From 6fce983f9b3ef51d47e647b2cff15049ef803781 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 13 Dec 2016 11:03:49 +0000 Subject: [PATCH 001/258] ASoC: dwc: Fix PIO mode initialization We can no longer rely on the return value of devm_snd_dmaengine_pcm_register(...) to check if the DMA handle is declared in the DT. Previously this check activated PIO mode but currently dma_request_chan returns either a valid channel or -EPROBE_DEFER. In order to activate PIO mode check instead if the interrupt line is declared. This reflects better what is documented in the DT bindings (see Documentation/devicetree/bindings/sound/ designware-i2s.txt). Also, initialize use_pio variable which was never being set causing PIO mode to never work. Signed-off-by: Jose Abreu Signed-off-by: Mark Brown --- sound/soc/dwc/designware_i2s.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index 2998954a1c74..bdf8398cbc81 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -681,22 +681,19 @@ static int dw_i2s_probe(struct platform_device *pdev) } if (!pdata) { - ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); - if (ret == -EPROBE_DEFER) { - dev_err(&pdev->dev, - "failed to register PCM, deferring probe\n"); - return ret; - } else if (ret) { - dev_err(&pdev->dev, - "Could not register DMA PCM: %d\n" - "falling back to PIO mode\n", ret); + if (irq >= 0) { ret = dw_pcm_register(pdev); - if (ret) { - dev_err(&pdev->dev, - "Could not register PIO PCM: %d\n", + dev->use_pio = true; + } else { + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, + 0); + dev->use_pio = false; + } + + if (ret) { + dev_err(&pdev->dev, "could not register pcm: %d\n", ret); - goto err_clk_disable; - } + goto err_clk_disable; } } From 0ea617a298dcdc2251b4e10f83ac3f3e627b66e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Dec 2016 13:05:43 +0000 Subject: [PATCH 002/258] ASoC: rsnd: don't double free kctrl On an error, snd_ctl_add already free's kctrl, so calling snd_ctl_free_one to free it again leads to a double free error. Fix this by removing the extraneous snd_ctl_free_one call. Issue found using static analysis with CoverityScan, CID 1372908 Signed-off-by: Colin Ian King Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 4bd68de76130..99b5b0835c1e 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1030,10 +1030,8 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod, return -ENOMEM; ret = snd_ctl_add(card, kctrl); - if (ret < 0) { - snd_ctl_free_one(kctrl); + if (ret < 0) return ret; - } cfg->update = update; cfg->card = card; From c2b36129ce53a22b89dd2b88db33e7ffdefe0f41 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Dec 2016 14:17:47 +0000 Subject: [PATCH 003/258] ASoC: topology: kfree kcontrol->private_value before freeing kcontrol kcontrol->private_value is being kfree'd after kcontrol has been freed (in previous call to snd_ctl_remove). Instead, fix this by kfreeing the private_value before kcontrol. CoverityScan CID#1388311 "Read from pointer after free" Fixes: eea3dd4f1247a ("ASoC: topology: Only free TLV for volume mixers of a widget") Signed-off-by: Colin Ian King Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 65670b2b408c..fbfb1fab88d5 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -514,13 +514,12 @@ static void remove_widget(struct snd_soc_component *comp, == SND_SOC_TPLG_TYPE_MIXER) kfree(kcontrol->tlv.p); - snd_ctl_remove(card, kcontrol); - /* Private value is used as struct soc_mixer_control * for volume mixers or soc_bytes_ext for bytes * controls. */ kfree((void *)kcontrol->private_value); + snd_ctl_remove(card, kcontrol); } kfree(w->kcontrol_news); } From 9e4d59ada4d602e78eee9fb5f898ce61fdddb446 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 16 Dec 2016 18:26:54 +0900 Subject: [PATCH 004/258] ASoC: hdmi-codec: use unsigned type to structure members with bit-field This is a fix for Linux 4.10-rc1. In C language specification, a bit-field is interpreted as a signed or unsigned integer type consisting of the specified number of bits. In GCC manual, the range of a signed bit field of N bits is from -(2^N) / 2 to ((2^N) / 2) - 1 https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Bit-Fields Therefore, when defined as 1 bit-field with signed type, variables can represents -1 and 0. The snd-soc-hdmi-codec module includes a structure which has signed type members with bit-fields. Codes of this module assign 0 and 1 to the members. This seems to result in implementation-dependent behaviours. As of v4.10-rc1 merge window, outside of sound subsystem, this structure is referred by below GPU modules. - tda998x - sti-drm - mediatek-drm-hdmi - msm As long as I review their codes relevant to the structure, the structure members are used just for condition statements and printk formats. My proposal of change is a bit intrusive to the printk formats but this may be acceptable. Totally, it's reasonable to use unsigned type for the structure members. This bug is detected by Sparse, static code analyzer with below warnings. ./include/sound/hdmi-codec.h:39:26: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:40:28: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:41:29: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:42:31: error: dubious one-bit signed bitfield Fixes: 09184118a8ab ("ASoC: hdmi-codec: Add hdmi-codec for external HDMI-encoders") Signed-off-by: Takashi Sakamoto Acked-by: Arnaud Pouliquen Signed-off-by: Mark Brown CC: stable@vger.kernel.org --- include/sound/hdmi-codec.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h index 530c57bdefa0..915c4357945c 100644 --- a/include/sound/hdmi-codec.h +++ b/include/sound/hdmi-codec.h @@ -36,10 +36,10 @@ struct hdmi_codec_daifmt { HDMI_AC97, HDMI_SPDIF, } fmt; - int bit_clk_inv:1; - int frame_clk_inv:1; - int bit_clk_master:1; - int frame_clk_master:1; + unsigned int bit_clk_inv:1; + unsigned int frame_clk_inv:1; + unsigned int bit_clk_master:1; + unsigned int frame_clk_master:1; }; /* From 65dadffddbe44a60f8be9e95f264949ba1e547e9 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 16 Dec 2016 14:43:39 -0800 Subject: [PATCH 005/258] Input: joydev - remove unused linux/miscdevice.h include This patch remove the inclusion of linux/miscdevice.h for joydev since it does not use miscdevice. Signed-off-by: Corentin Labbe Signed-off-by: Dmitry Torokhov --- drivers/input/joydev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index f3135ae22df4..abd18f31b24f 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include From 41c567a5d7d1a986763e58c3394782813c3bcb03 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 18 Dec 2016 15:26:12 -0800 Subject: [PATCH 006/258] Input: i8042 - add Pegatron touchpad to noloop table Avoid AUX loopback in Pegatron C15B touchpad, so input subsystem is able to recognize a Synaptics touchpad in the AUX port. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=93791 (Touchpad is not detected on DNS 0801480 notebook (PEGATRON C15B)) Suggested-by: Dmitry Torokhov Signed-off-by: Marcos Paulo de Souza Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 73a4e68448fc..381d802fe853 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), + DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), + }, + }, { } }; From 67626c9323027534496d21cf32793121662d03c0 Mon Sep 17 00:00:00 2001 From: Aniroop Mathur Date: Sun, 18 Dec 2016 15:27:16 -0800 Subject: [PATCH 007/258] Input: synaptics_i2c - change msleep to usleep_range for small msecs msleep(1~20) may not do what the caller intends, and will often sleep longer. (~20 ms actual sleep for any value given in the 1~20ms range) This is not the desired behaviour for many cases like device resume time, device suspend time, device enable time, retry logic, etc. Thus, change msleep to usleep_range for precise wakeups. Signed-off-by: Aniroop Mathur Acked-by: Igor Grinberg Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c index aa7c5da60800..cb2bf203f4ca 100644 --- a/drivers/input/mouse/synaptics_i2c.c +++ b/drivers/input/mouse/synaptics_i2c.c @@ -29,7 +29,7 @@ * after soft reset, we should wait for 1 ms * before the device becomes operational */ -#define SOFT_RESET_DELAY_MS 3 +#define SOFT_RESET_DELAY_US 3000 /* and after hard reset, we should wait for max 500ms */ #define HARD_RESET_DELAY_MS 500 @@ -311,7 +311,7 @@ static int synaptics_i2c_reset_config(struct i2c_client *client) if (ret) { dev_err(&client->dev, "Unable to reset device\n"); } else { - msleep(SOFT_RESET_DELAY_MS); + usleep_range(SOFT_RESET_DELAY_US, SOFT_RESET_DELAY_US + 100); ret = synaptics_i2c_config(client); if (ret) dev_err(&client->dev, "Unable to config device\n"); From 4a8b3a682be9addff7dbd16371fa8c34103b5c31 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 16 Dec 2016 10:55:49 -0600 Subject: [PATCH 008/258] ASoC: Intel: bytcr_rt5640: fallback mechanism if MCLK is not enabled Commit df1a2776a795 ("ASoC: Intel: bytcr_rt5640: add MCLK support") was merged but the corresponding clock framework patches have not, after being bumped from audio to clock to x86 domains. The missing clock-related patches result in a regression starting with 4.9 with the audio card not being created. Rather than reverting this commit and all following updates already queued up for 4.10, handle run-time dependency on MCLK and fall back to the previous bit-clock mode. This provides the same functionality as in 4.8 for Baytrail devices. On Baytrail-CR most devices remain silent with this fallback but additional patches are needed anyway. As suggested by Mark Brown, the fallback is only allowed with -ENOENT, all other run-time errors, including -EPROBE_DEFER, will stop the probe with no sound card registered. This patch should be applied to -stable as well as ASoC 4.10 fixes Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 507a86a5eafe..e33e4777a65c 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -825,10 +825,20 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && (is_valleyview())) { priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); if (IS_ERR(priv->mclk)) { + ret_val = PTR_ERR(priv->mclk); + dev_err(&pdev->dev, - "Failed to get MCLK from pmc_plt_clk_3: %ld\n", - PTR_ERR(priv->mclk)); - return PTR_ERR(priv->mclk); + "Failed to get MCLK from pmc_plt_clk_3: %d\n", + ret_val); + + /* + * Fall back to bit clock usage for -ENOENT (clock not + * available likely due to missing dependencies), bail + * for all other errors, including -EPROBE_DEFER + */ + if (ret_val != -ENOENT) + return ret_val; + byt_rt5640_quirk &= ~BYT_RT5640_MCLK_EN; } } From 73ba39ab9307340dc98ec3622891314bbc09cc2e Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 12:51:53 +0800 Subject: [PATCH 009/258] btrfs: return the actual error value from from btrfs_uuid_tree_iterate In function btrfs_uuid_tree_iterate(), errno is assigned to variable ret on errors. However, it directly returns 0. It may be better to return ret. This patch also removes the warning, because the caller already prints a warning. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188731 Signed-off-by: Pan Bian Reviewed-by: Omar Sandoval [ edited subject ] Signed-off-by: David Sterba --- fs/btrfs/uuid-tree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 161342b73ce5..726f928238d0 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -352,7 +352,5 @@ skip: out: btrfs_free_path(path); - if (ret) - btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret); - return 0; + return ret; } From 1cab2a84f470e15ecc8e5143bfe9398c6e888032 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 20 Dec 2016 10:29:12 +0000 Subject: [PATCH 010/258] ASoC: wm_adsp: Don't overrun firmware file buffer when reading region data Protect against corrupt firmware files by ensuring that the length we get for the data in a region actually lies within the available firmware file data buffer. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 593b7d1aed46..d72ccef9e238 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1551,7 +1551,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) const struct wmfw_region *region; const struct wm_adsp_region *mem; const char *region_name; - char *file, *text; + char *file, *text = NULL; struct wm_adsp_buf *buf; unsigned int reg; int regions = 0; @@ -1700,10 +1700,21 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); + if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > + firmware->size) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, regions, region_name, + le32_to_cpu(region->len), firmware->size); + ret = -EINVAL; + goto out_fw; + } + if (text) { memcpy(text, region->data, le32_to_cpu(region->len)); adsp_info(dsp, "%s: %s\n", file, text); kfree(text); + text = NULL; } if (reg) { @@ -1748,6 +1759,7 @@ out_fw: regmap_async_complete(regmap); wm_adsp_buf_free(&buf_list); release_firmware(firmware); + kfree(text); out: kfree(file); @@ -2233,6 +2245,17 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { + if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > + firmware->size) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, blocks, region_name, + le32_to_cpu(blk->len), + firmware->size); + ret = -EINVAL; + goto out_fw; + } + buf = wm_adsp_buf_alloc(blk->data, le32_to_cpu(blk->len), &buf_list); From 7dbbf0fa1bf14c17900bb8057986b06db3822239 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Nov 2016 16:17:50 -0800 Subject: [PATCH 011/258] scsi: scsi-mq: Wait for .queue_rq() if necessary Ensure that if scsi-mq is enabled that scsi_internal_device_block() waits until ongoing shost->hostt->queuecommand() calls have finished. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Cc: James Bottomley Cc: Christoph Hellwig Cc: Doug Ledford Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c35b6de4ca64..9fd9a977c695 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2893,7 +2893,7 @@ scsi_internal_device_block(struct scsi_device *sdev) * request queue. */ if (q->mq_ops) { - blk_mq_stop_hw_queues(q); + blk_mq_quiesce_queue(q); } else { spin_lock_irqsave(q->queue_lock, flags); blk_stop_queue(q); From 7961d53d22375bb9e8ae8063533b9059102ed39d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 20 Dec 2016 08:43:30 -0800 Subject: [PATCH 012/258] scsi: qedi: fix build, depends on UIO Fix build of SCSI qedi driver. It uses uio interfaces so it should depend on UIO. ERROR: "uio_unregister_device" [drivers/scsi/qedi/qedi.ko] undefined! ERROR: "uio_event_notify" [drivers/scsi/qedi/qedi.ko] undefined! ERROR: "__uio_register_device" [drivers/scsi/qedi/qedi.ko] undefined! Signed-off-by: Randy Dunlap Cc: QLogic-Storage-Upstream@cavium.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/Kconfig b/drivers/scsi/qedi/Kconfig index 23ca8a274586..21331453db7b 100644 --- a/drivers/scsi/qedi/Kconfig +++ b/drivers/scsi/qedi/Kconfig @@ -1,6 +1,6 @@ config QEDI tristate "QLogic QEDI 25/40/100Gb iSCSI Initiator Driver Support" - depends on PCI && SCSI + depends on PCI && SCSI && UIO depends on QED select SCSI_ISCSI_ATTRS select QED_LL2 From b6fc513da50c5dbc457a8ad6b58b046a6a68fd9d Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Tue, 27 Dec 2016 11:44:51 -0800 Subject: [PATCH 013/258] Input: xpad - use correct product id for x360w controllers currently the controllers get the same product id as the wireless receiver. However the controllers actually have their own product id. The patch makes the driver expose the same product id as the windows driver. This improves compatibility when running applications with WINE. see https://github.com/paroj/xpad/issues/54 Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 6d9499658671..c7d5b2b643d1 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1377,6 +1377,12 @@ static int xpad_init_input(struct usb_xpad *xpad) input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); + + if (xpad->xtype == XTYPE_XBOX360W) { + /* x360w controllers and the receiver have different ids */ + input_dev->id.product = 0x02a1; + } + input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); From d7ddad0acc4add42567f7879b116a0b9eea31860 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 27 Dec 2016 11:32:55 -0800 Subject: [PATCH 014/258] Input: synaptics-rmi4 - fix F03 build error when serio is module Since F03 is a boolean, "depends" on symbols that can be modules do not work quite right. We can enable F03 if SERIO is built-in or if both RMI core and SERIO core are modules. If SERIO core is module, but RMI is built-in, we'll get: drivers/built-in.o: In function `rmi_f03_attention': rmi_f03.c:(.text+0xf8ef8): undefined reference to `serio_interrupt' rmi_f03.c:(.text+0xf8fbd): undefined reference to `serio_interrupt' drivers/built-in.o: In function `rmi_f03_remove': rmi_f03.c:(.text+0xf9082): undefined reference to `serio_unregister_port' drivers/built-in.o: In function `rmi_f03_probe': rmi_f03.c:(.text+0xf9260): undefined reference to `__serio_register_port' Reported-by: Randy Dunlap Reviewed-by: Guenter Roeck Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig index 30cc627a4f45..8993983e3fe4 100644 --- a/drivers/input/rmi4/Kconfig +++ b/drivers/input/rmi4/Kconfig @@ -41,7 +41,8 @@ config RMI4_SMB config RMI4_F03 bool "RMI4 Function 03 (PS2 Guest)" - depends on RMI4_CORE && SERIO + depends on RMI4_CORE + depends on SERIO=y || RMI4_CORE=SERIO help Say Y here if you want to add support for RMI4 function 03. From 9396c9cb0d9534ca67bda8a34b2413a2ca1c67e9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:50 +0900 Subject: [PATCH 015/258] perf sched timehist: Show total scheduling time Show length of analyzed sample time and rate of idle task running. This also takes care of time range given by --time option. $ perf sched timehist -sI | tail Samples do not have callchains. Idle stats: CPU 0 idle for 930.316 msec ( 92.93%) CPU 1 idle for 963.614 msec ( 96.25%) CPU 2 idle for 885.482 msec ( 88.45%) CPU 3 idle for 938.635 msec ( 93.76%) Total number of unique tasks: 118 Total number of context switches: 2337 Total run time (msec): 3718.048 Total scheduling time (msec): 1001.131 (x 4) Suggested-by: David Ahern Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d53e706a6f17..5b134b0d1ff3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -209,6 +209,7 @@ struct perf_sched { u64 skipped_samples; const char *time_str; struct perf_time_interval ptime; + struct perf_time_interval hist_time; }; /* per thread run time data */ @@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool, timehist_print_sample(sched, sample, &al, thread, t); out: + if (sched->hist_time.start == 0 && t >= ptime->start) + sched->hist_time.start = t; + if (ptime->end == 0 || t <= ptime->end) + sched->hist_time.end = t; + if (tr) { /* time of this sched_switch event becomes last time task seen */ tr->last_time = sample->time; @@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched, struct thread *t; struct thread_runtime *r; int i; + u64 hist_time = sched->hist_time.end - sched->hist_time.start; memset(&totals, 0, sizeof(totals)); @@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched, totals.sched_count += r->run_stats.n; printf(" CPU %2d idle for ", i); print_sched_time(r->total_run_time, 6); - printf(" msec\n"); + printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); } else printf(" CPU %2d idle entire time window\n", i); } @@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\n" " Total number of unique tasks: %" PRIu64 "\n" - "Total number of context switches: %" PRIu64 "\n" - " Total run time (msec): ", + "Total number of context switches: %" PRIu64 "\n", totals.task_count, totals.sched_count); + printf(" Total run time (msec): "); print_sched_time(totals.total_run_time, 2); printf("\n"); + + printf(" Total scheduling time (msec): "); + print_sched_time(hist_time, 2); + printf(" (x %d)\n", sched->max_cpu); } typedef int (*sched_handler)(struct perf_tool *tool, From ee12996c9d392ec61241ab6c74d257bc2122e0bc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Dec 2016 21:49:17 -0300 Subject: [PATCH 016/258] samples/bpf sock_example: Avoid getting ethhdr from two includes To avoid the following build failure on Alpine Linux 3.4, that has clang-3.8 with the bpf target: HOSTCC samples/bpf/sock_example.o In file included from /usr/include/net/ethernet.h:10:0, from /git/linux/samples/bpf/sock_example.h:7, from /git/linux/samples/bpf/sock_example.c:30: /usr/include/netinet/if_ether.h:96:8: error: redefinition of 'struct ethhdr' struct ethhdr { ^ In file included from /git/linux/samples/bpf/sock_example.c:26:0: ./usr/include/linux/if_ether.h:144:8: note: originally defined here struct ethhdr { ^ scripts/Makefile.host:124: recipe for target 'samples/bpf/sock_example.o' failed make[2]: *** [samples/bpf/sock_example.o] Error 1 /git/linux/Makefile:1658: recipe for target 'samples/bpf/' failed So include net/if_ether.h for the needs of sock_example.h, using the same include that sock_example.c uses. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Joe Stringer Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-m9avekl1b651qe1r1zd5tzz9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/sock_example.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h index 09f7fe7e5fd7..d8014065d479 100644 --- a/samples/bpf/sock_example.h +++ b/samples/bpf/sock_example.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include From abfb7b686a3e5be27bf81db62f9c5c895b76f5d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 24 Dec 2016 13:59:23 +0000 Subject: [PATCH 017/258] efi/libstub/arm*: Pass latest memory map to the kernel As reported by James Morse, the current libstub code involving the annotated memory map only works somewhat correctly by accident, due to the fact that a pool allocation happens to be reused immediately, retaining its former contents on most implementations of the UEFI boot services. Instead of juggling memory maps, which makes the code more complex than it needs to be, simply put placeholder values into the FDT for the memory map parameters, and only write the actual values after ExitBootServices() has been called. Reported-by: James Morse Signed-off-by: Ard Biesheuvel Cc: Cc: Jeffrey Hugo Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Fixes: ed9cc156c42f ("efi/libstub: Use efi_exit_boot_services() in FDT") Link: http://lkml.kernel.org/r/1482587963-20183-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/efistub.h | 8 --- drivers/firmware/efi/libstub/fdt.c | 87 +++++++++++++++++--------- 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index b98824e3800a..0e2a96b12cb3 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -39,14 +39,6 @@ efi_status_t efi_file_close(void *handle); unsigned long get_dram_base(efi_system_table_t *sys_table_arg); -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver); - efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, void *handle, unsigned long *new_fdt_addr, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index a6a93116a8f0..921dfa047202 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -16,13 +16,10 @@ #include "efistub.h" -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver) +static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, + unsigned long orig_fdt_size, + void *fdt, int new_fdt_size, char *cmdline_ptr, + u64 initrd_addr, u64 initrd_size) { int node, num_rsv; int status; @@ -101,25 +98,23 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; - fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map); + fdt_val64 = U64_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-start", &fdt_val64, sizeof(fdt_val64)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(map_size); + fdt_val32 = U32_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_size); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_ver); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver", &fdt_val32, sizeof(fdt_val32)); if (status) @@ -148,6 +143,43 @@ fdt_set_fail: return EFI_LOAD_ERROR; } +static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) +{ + int node = fdt_path_offset(fdt, "/chosen"); + u64 fdt_val64; + u32 fdt_val32; + int err; + + if (node < 0) + return EFI_LOAD_ERROR; + + fdt_val64 = cpu_to_fdt64((unsigned long)*map->map); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start", + &fdt_val64, sizeof(fdt_val64)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->map_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_ver); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-ver", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + return EFI_SUCCESS; +} + #ifndef EFI_FDT_ALIGN #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif @@ -243,20 +275,10 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, goto fail; } - /* - * Now that we have done our final memory allocation (and free) - * we can get the memory map key needed for - * exit_boot_services(). - */ - status = efi_get_memory_map(sys_table, &map); - if (status != EFI_SUCCESS) - goto fail_free_new_fdt; - status = update_fdt(sys_table, (void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, new_fdt_size, - cmdline_ptr, initrd_addr, initrd_size, - memory_map, map_size, desc_size, desc_ver); + cmdline_ptr, initrd_addr, initrd_size); /* Succeeding the first time is the expected case. */ if (status == EFI_SUCCESS) @@ -266,20 +288,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, /* * We need to allocate more space for the new * device tree, so free existing buffer that is - * too small. Also free memory map, as we will need - * to get new one that reflects the free/alloc we do - * on the device tree buffer. + * too small. */ efi_free(sys_table, new_fdt_size, *new_fdt_addr); - sys_table->boottime->free_pool(memory_map); new_fdt_size += EFI_PAGE_SIZE; } else { pr_efi_err(sys_table, "Unable to construct new device tree.\n"); - goto fail_free_mmap; + goto fail_free_new_fdt; } } - sys_table->boottime->free_pool(memory_map); priv.runtime_map = runtime_map; priv.runtime_entry_count = &runtime_entry_count; status = efi_exit_boot_services(sys_table, handle, &map, &priv, @@ -288,6 +306,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; + status = update_fdt_memmap((void *)*new_fdt_addr, &map); + if (status != EFI_SUCCESS) { + /* + * The kernel won't get far without the memory map, but + * may still be able to print something meaningful so + * return success here. + */ + return EFI_SUCCESS; + } + /* Install the new virtual address map */ svam = sys_table->runtime->set_virtual_address_map; status = svam(runtime_entry_count * desc_size, desc_size, @@ -319,9 +347,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi_err(sys_table, "Exit boot services failed.\n"); -fail_free_mmap: - sys_table->boottime->free_pool(memory_map); - fail_free_new_fdt: efi_free(sys_table, new_fdt_size, *new_fdt_addr); From b6f4c66704b875aba9b8c912532323e3cc89824c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 28 Dec 2016 10:47:13 -0300 Subject: [PATCH 018/258] samples/bpf trace_output_user: Remove duplicate sys/ioctl.h include Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Joe Stringer Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3awp0nv8tpnblatojmwjww7z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/trace_output_user.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index f4fa6af22def..ccca1e348017 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include From 63447646ac657fde00bb658ce21a3431940ae0ad Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Thu, 15 Dec 2016 15:49:56 +0100 Subject: [PATCH 019/258] rpmsg: virtio_rpmsg_bus: fix channel creation Since commit 4dffed5b3ac796b ("rpmsg: Name rpmsg devices based on channel id"), it is no more possible for a firmware to register twice a service (on different endpoints). rpmsg_register_device function is failing when calling device_add for the second time as second device has the same name as first one already register. It is because name is based only on service name and so is not more unique. Previously name was unique thanks to the use of rpmsg_dev_index. This patch adds destination and source endpoint numbers device name to create an unique identifier. Fixes: 4dffed5b3ac7 ("rpmsg: Name rpmsg devices based on channel id") Acked-by: Peter Griffin Signed-off-by: Loic Pallardy [bjorn: flipped name and address in device name] Signed-off-by: Bjorn Andersson --- drivers/rpmsg/rpmsg_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index a79cb5a9e5f2..1cfb775e8e82 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -453,8 +453,8 @@ int rpmsg_register_device(struct rpmsg_device *rpdev) struct device *dev = &rpdev->dev; int ret; - dev_set_name(&rpdev->dev, "%s:%s", - dev_name(dev->parent), rpdev->id.name); + dev_set_name(&rpdev->dev, "%s.%s.%d.%d", dev_name(dev->parent), + rpdev->id.name, rpdev->src, rpdev->dst); rpdev->dev.bus = &rpmsg_bus; rpdev->dev.release = rpmsg_release_device; From c81c0e0710f031cb09eb7cbf0e75e6754d1d8346 Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Wed, 14 Dec 2016 16:11:00 +0100 Subject: [PATCH 020/258] remoteproc: fix vdev reference management Commit 2b45cef5868a ("remoteproc: Further extend the vdev life cycle") extends kref support for vdev management. It introduces a regression when following sequence is executed: rproc_boot --> rproc_shutdown --> rproc_boot Second rproc_boot call crashes on register_virtio_device as device is already existing. Issue is previous vdev is never released when rproc is stop because associated refcount is too high. kref_get introduces is not needed as kref_init already initializes krefcount to 1 because it considers associated variable as used. This introduces a misalignment between kref_get and kref_put calls. Fixes: 2b45cef5868a ("remoteproc: Further extend the vdev life cycle") Signed-off-by: Loic Pallardy Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 9a507e77eced..feb24c43d4c7 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -396,9 +396,6 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, goto unwind_vring_allocations; } - /* track the rvdevs list reference */ - kref_get(&rvdev->refcount); - list_add_tail(&rvdev->node, &rproc->rvdevs); rproc_add_subdev(rproc, &rvdev->subdev, From a0c10687ec9506b5e14fe3dd47832a77f2f2500c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 30 Dec 2016 03:21:38 -0800 Subject: [PATCH 021/258] Revert "remoteproc: Merge table_ptr and cached_table pointers" Following any fw_rsc_vdev entries in the resource table are two variable length arrays, the first one reference vring resources and the second one is the virtio config space. The virtio config space is used by virtio to communicate status and configuration changes and must as such be shared with the remote. The reverted commit incorrectly made any changes to the virtio config space only affect the local copy, in an attempt to allowing memory protection of the shared resource table. This reverts commit cda8529346935fc86f476999ac4fbfe4e17abf11. Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 26 ++++++++++++++++---------- include/linux/remoteproc.h | 4 +++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index feb24c43d4c7..90b05c72186c 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -886,13 +886,15 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) /* * Create a copy of the resource table. When a virtio device starts * and calls vring_new_virtqueue() the address of the allocated vring - * will be stored in the table_ptr. Before the device is started, - * table_ptr will be copied into device memory. + * will be stored in the cached_table. Before the device is started, + * cached_table will be copied into device memory. */ - rproc->table_ptr = kmemdup(table, tablesz, GFP_KERNEL); - if (!rproc->table_ptr) + rproc->cached_table = kmemdup(table, tablesz, GFP_KERNEL); + if (!rproc->cached_table) goto clean_up; + rproc->table_ptr = rproc->cached_table; + /* reset max_notifyid */ rproc->max_notifyid = -1; @@ -911,16 +913,18 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) } /* - * The starting device has been given the rproc->table_ptr as the + * The starting device has been given the rproc->cached_table as the * resource table. The address of the vring along with the other - * allocated resources (carveouts etc) is stored in table_ptr. + * allocated resources (carveouts etc) is stored in cached_table. * In order to pass this information to the remote device we must copy * this information to device memory. We also update the table_ptr so * that any subsequent changes will be applied to the loaded version. */ loaded_table = rproc_find_loaded_rsc_table(rproc, fw); - if (loaded_table) - memcpy(loaded_table, rproc->table_ptr, tablesz); + if (loaded_table) { + memcpy(loaded_table, rproc->cached_table, tablesz); + rproc->table_ptr = loaded_table; + } /* power up the remote processor */ ret = rproc->ops->start(rproc); @@ -948,7 +952,8 @@ stop_rproc: clean_up_resources: rproc_resource_cleanup(rproc); clean_up: - kfree(rproc->table_ptr); + kfree(rproc->cached_table); + rproc->cached_table = NULL; rproc->table_ptr = NULL; rproc_disable_iommu(rproc); @@ -1182,7 +1187,8 @@ void rproc_shutdown(struct rproc *rproc) rproc_disable_iommu(rproc); /* Free the copy of the resource table */ - kfree(rproc->table_ptr); + kfree(rproc->cached_table); + rproc->cached_table = NULL; rproc->table_ptr = NULL; /* if in crash state, unlock crash handler */ diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index e2f3a3281d8f..8265d351c9f0 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -408,7 +408,8 @@ enum rproc_crash_type { * @crash_comp: completion used to sync crash handler and the rproc reload * @recovery_disabled: flag that state if recovery was disabled * @max_notifyid: largest allocated notify id. - * @table_ptr: our copy of the resource table + * @table_ptr: pointer to the resource table in effect + * @cached_table: copy of the resource table * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { @@ -440,6 +441,7 @@ struct rproc { bool recovery_disabled; int max_notifyid; struct resource_table *table_ptr; + struct resource_table *cached_table; bool has_iommu; bool auto_boot; }; From d2e3a1358c37cd82eef92b5e908b4f0472194481 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 29 Dec 2016 14:11:21 +0100 Subject: [PATCH 022/258] ASoC: Fix binding and probing of auxiliary components Currently binding of auxiliary devices doesn't work as in soc_bind_aux_dev() function a bound component is not being added to any list and in soc_probe_aux_devices() we are trying to walk the component_dev_list list to probe auxiliary components but at that time this list doesn't contain any auxiliary components since they are being added to the card only in soc_probe_component(). This patch adds a list to the card where are stored bound but not probed auxiliary devices, so that all aux devices can be probed. Fixes: 1a653aa44725 "ASoC: core: replace aux_comp_list to component_dev_list" Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown --- include/sound/soc.h | 3 +++ sound/soc/soc-core.c | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 2b502f6cc6d0..b86168a21d56 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -813,6 +813,7 @@ struct snd_soc_component { unsigned int suspended:1; /* is in suspend PM state */ struct list_head list; + struct list_head card_aux_list; /* for auxiliary bound components */ struct list_head card_list; struct snd_soc_dai_driver *dai_drv; @@ -1152,6 +1153,7 @@ struct snd_soc_card { */ struct snd_soc_aux_dev *aux_dev; int num_aux_devs; + struct list_head aux_comp_list; const struct snd_kcontrol_new *controls; int num_controls; @@ -1547,6 +1549,7 @@ static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card) INIT_LIST_HEAD(&card->widgets); INIT_LIST_HEAD(&card->paths); INIT_LIST_HEAD(&card->dapm_list); + INIT_LIST_HEAD(&card->aux_comp_list); INIT_LIST_HEAD(&card->component_dev_list); } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f1901bb1466e..baa1afa41e3d 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1748,6 +1748,7 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num) component->init = aux_dev->init; component->auxiliary = 1; + list_add(&component->card_aux_list, &card->aux_comp_list); return 0; @@ -1758,16 +1759,14 @@ err_defer: static int soc_probe_aux_devices(struct snd_soc_card *card) { - struct snd_soc_component *comp; + struct snd_soc_component *comp, *tmp; int order; int ret; for (order = SND_SOC_COMP_ORDER_FIRST; order <= SND_SOC_COMP_ORDER_LAST; order++) { - list_for_each_entry(comp, &card->component_dev_list, card_list) { - if (!comp->auxiliary) - continue; - + list_for_each_entry_safe(comp, tmp, &card->aux_comp_list, + card_aux_list) { if (comp->driver->probe_order == order) { ret = soc_probe_component(card, comp); if (ret < 0) { @@ -1776,6 +1775,7 @@ static int soc_probe_aux_devices(struct snd_soc_card *card) comp->name, ret); return ret; } + list_del(&comp->card_aux_list); } } } From 63c3194b82530bd71fd49db84eb7ab656b8d404a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 23 Dec 2016 11:21:10 +0200 Subject: [PATCH 023/258] ASoC: tlv320aic3x: Mark the RESET register as volatile The RESET register only have one self clearing bit and it should not be cached. If it is cached, when we sync the registers back to the chip we will initiate a software reset as well, which is not desirable. Signed-off-by: Peter Ujfalusi Reviewed-by: Jarkko Nikula Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic3x.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 8877b74b0510..bb94d50052d7 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -126,6 +126,16 @@ static const struct reg_default aic3x_reg[] = { { 108, 0x00 }, { 109, 0x00 }, }; +static bool aic3x_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AIC3X_RESET: + return true; + default: + return false; + } +} + static const struct regmap_config aic3x_regmap = { .reg_bits = 8, .val_bits = 8, @@ -133,6 +143,9 @@ static const struct regmap_config aic3x_regmap = { .max_register = DAC_ICC_ADJ, .reg_defaults = aic3x_reg, .num_reg_defaults = ARRAY_SIZE(aic3x_reg), + + .volatile_reg = aic3x_volatile_reg, + .cache_type = REGCACHE_RBTREE, }; From 91ce54978ccece323aa6df930249ff84a7d233c7 Mon Sep 17 00:00:00 2001 From: G Kranthi Date: Tue, 20 Dec 2016 12:46:45 +0530 Subject: [PATCH 024/258] ASoC: Intel: Skylake: Fix to fail safely if module not available in path If a module is not available in a pipeline, fail safely rather than causing oops. Signed-off-by: G Kranthi Signed-off-by: Subhransu S. Prusty Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 84b5101e6ca6..6c6b63a6b338 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -180,6 +180,9 @@ static int skl_pcm_open(struct snd_pcm_substream *substream, snd_pcm_set_sync(substream); mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream); + if (!mconfig) + return -EINVAL; + skl_tplg_d0i3_get(skl, mconfig->d0i3_caps); return 0; From a33b56a6a824fa5cd89c74f85cbeb9af1dcef87e Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 20 Dec 2016 16:47:06 +0800 Subject: [PATCH 025/258] ASoC: nau8825: correct the function name of register Change to correct name of the register function. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 6 +++--- sound/soc/codecs/nau8825.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index efe3a44658d5..abf77dd422f4 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -561,9 +561,9 @@ static void nau8825_xtalk_prepare(struct nau8825 *nau8825) nau8825_xtalk_backup(nau8825); /* Config IIS as master to output signal by codec */ regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2, - NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK | + NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK | NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_MASTER | - (0x2 << NAU8825_I2S_DRV_SFT) | 0x1); + (0x2 << NAU8825_I2S_LRC_DIV_SFT) | 0x1); /* Ramp up headphone volume to 0dB to get better performance and * avoid pop noise in headphone. */ @@ -657,7 +657,7 @@ static void nau8825_xtalk_clean(struct nau8825 *nau8825) NAU8825_IRQ_RMS_EN, NAU8825_IRQ_RMS_EN); /* Recover default value for IIS */ regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2, - NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK | + NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK | NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_SLAVE); /* Restore value of specific register for cross talk */ nau8825_xtalk_restore(nau8825); diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index 5d1704e73241..b6b21b312854 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -247,8 +247,8 @@ /* I2S_PCM_CTRL2 (0x1d) */ #define NAU8825_I2S_TRISTATE (1 << 15) /* 0 - normal mode, 1 - Hi-Z output */ -#define NAU8825_I2S_DRV_SFT 12 -#define NAU8825_I2S_DRV_MASK (0x3 << NAU8825_I2S_DRV_SFT) +#define NAU8825_I2S_LRC_DIV_SFT 12 +#define NAU8825_I2S_LRC_DIV_MASK (0x3 << NAU8825_I2S_LRC_DIV_SFT) #define NAU8825_I2S_MS_SFT 3 #define NAU8825_I2S_MS_MASK (1 << NAU8825_I2S_MS_SFT) #define NAU8825_I2S_MS_MASTER (1 << NAU8825_I2S_MS_SFT) From a1792cda51300e15b03549cccf0b09f3be82e697 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 20 Dec 2016 12:03:09 +0800 Subject: [PATCH 026/258] ASoC: nau8825: fix invalid configuration in Pre-Scalar of FLL The clk_ref_div is not configured in the correct position of the register. The patch fixes that clk_ref_div, Pre-Scalar, is assigned the wrong value. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 3 ++- sound/soc/codecs/nau8825.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index abf77dd422f4..4576f987a4a5 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -2006,7 +2006,8 @@ static void nau8825_fll_apply(struct nau8825 *nau8825, NAU8825_FLL_INTEGER_MASK, fll_param->fll_int); /* FLL pre-scaler */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL4, - NAU8825_FLL_REF_DIV_MASK, fll_param->clk_ref_div); + NAU8825_FLL_REF_DIV_MASK, + fll_param->clk_ref_div << NAU8825_FLL_REF_DIV_SFT); /* select divided VCO input */ regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL5, NAU8825_FLL_CLK_SW_MASK, NAU8825_FLL_CLK_SW_REF); diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index b6b21b312854..514fd13c2f46 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -137,7 +137,8 @@ #define NAU8825_FLL_CLK_SRC_FS (0x3 << NAU8825_FLL_CLK_SRC_SFT) /* FLL4 (0x07) */ -#define NAU8825_FLL_REF_DIV_MASK (0x3 << 10) +#define NAU8825_FLL_REF_DIV_SFT 10 +#define NAU8825_FLL_REF_DIV_MASK (0x3 << NAU8825_FLL_REF_DIV_SFT) /* FLL5 (0x08) */ #define NAU8825_FLL_PDB_DAC_EN (0x1 << 15) From 1594c18fd297a8edcc72bc4b161f3f52603ebb92 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 13 Dec 2016 11:15:21 -0700 Subject: [PATCH 027/258] dmaengine: ioatdma: Add Skylake PCI Dev ID Adding Skylake Xeon PCI device ids for ioatdma and related bits. Signed-off-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/hw.h | 2 ++ drivers/dma/ioat/init.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 8e67895bcca3..abcc51b343ce 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -64,6 +64,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e #define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f +#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021 + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 90eddd9f07e4..51b2b643ba71 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -106,6 +106,8 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) }, + /* I/OAT v3.3 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, @@ -243,10 +245,15 @@ static bool is_bdx_ioat(struct pci_dev *pdev) } } +static inline bool is_skx_ioat(struct pci_dev *pdev) +{ + return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false; +} + static bool is_xeon_cb32(struct pci_dev *pdev) { return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev) || is_bdx_ioat(pdev); + is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev); } bool is_bwd_ioat(struct pci_dev *pdev) From 34a31f0af84158955a9747fb5c6712da5bbb5331 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 13 Dec 2016 11:15:27 -0700 Subject: [PATCH 028/258] dmaengine: ioatdma: workaround SKX ioatdma version The Skylake ioatdma is technically CBDMA 3.2+ and contains the same hardware bits with some additional 3.3 features, but it's not really 3.3 where the driver is concerned. Signed-off-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 51b2b643ba71..ace5cb2cb12f 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1364,6 +1364,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) device->version = readb(device->reg_base + IOAT_VER_OFFSET); if (device->version >= IOAT_VER_3_0) { + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; err = ioat3_dma_probe(device, ioat_dca_enabled); if (device->version >= IOAT_VER_3_3) From 1032471b3ec823bce7687034ac5af78a8ac99a9c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 15 Dec 2016 11:43:30 +0100 Subject: [PATCH 029/258] dmaengine: dw: fix typo in Kconfig platfroms -> platforms Signed-off-by: Jean Delvare Fixes: fed42c198b45 ("dma: dw: add PCI part of the driver") Cc: Viresh Kumar Acked-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index e00c9b022964..5a37b9fcf40d 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -24,5 +24,5 @@ config DW_DMAC_PCI select DW_DMAC_CORE help Support the Synopsys DesignWare AHB DMA controller on the - platfroms that enumerate it as a PCI device. For example, + platforms that enumerate it as a PCI device. For example, Intel Medfield has integrated this GPDMA controller. From 7e96304d99477de1f70db42035071e56439da817 Mon Sep 17 00:00:00 2001 From: M'boumba Cedric Madianga Date: Tue, 13 Dec 2016 14:40:43 +0100 Subject: [PATCH 030/258] dmaengine: stm32-dma: Set correct args number for DMA request from DT This patch sets the right number of arguments to be used for DMA clients which request channels from DT. Signed-off-by: M'boumba Cedric Madianga Reviewed-by: Ludovic BARRE Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 3688d0873a3e..a884b857cba4 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -972,21 +972,18 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, struct stm32_dma_chan *chan; struct dma_chan *c; - if (dma_spec->args_count < 3) + if (dma_spec->args_count < 4) return NULL; cfg.channel_id = dma_spec->args[0]; cfg.request_line = dma_spec->args[1]; cfg.stream_config = dma_spec->args[2]; - cfg.threshold = 0; + cfg.threshold = dma_spec->args[3]; if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) return NULL; - if (dma_spec->args_count > 3) - cfg.threshold = dma_spec->args[3]; - chan = &dmadev->chan[cfg.channel_id]; c = dma_get_slave_channel(&chan->vchan.chan); From 57b5a32135c813f2ab669039fb4ec16b30cb3305 Mon Sep 17 00:00:00 2001 From: M'boumba Cedric Madianga Date: Tue, 13 Dec 2016 14:40:46 +0100 Subject: [PATCH 031/258] dmaengine: stm32-dma: Fix null pointer dereference in stm32_dma_tx_status chan->desc is always set to NULL when a DMA transfer is complete. As a DMA transfer could be complete during the call of stm32_dma_tx_status, we need to be sure that chan->desc is not NULL before using this variable to avoid a null pointer deference issue. Signed-off-by: M'boumba Cedric Madianga Reviewed-by: Ludovic BARRE Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index a884b857cba4..3056ce7f8c69 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -880,7 +880,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, struct virt_dma_desc *vdesc; enum dma_status status; unsigned long flags; - u32 residue; + u32 residue = 0; status = dma_cookie_status(c, cookie, state); if ((status == DMA_COMPLETE) || (!state)) @@ -888,16 +888,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, spin_lock_irqsave(&chan->vchan.lock, flags); vdesc = vchan_find_desc(&chan->vchan, cookie); - if (cookie == chan->desc->vdesc.tx.cookie) { + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) residue = stm32_dma_desc_residue(chan, chan->desc, chan->next_sg); - } else if (vdesc) { + else if (vdesc) residue = stm32_dma_desc_residue(chan, to_stm32_dma_desc(vdesc), 0); - } else { - residue = 0; - } - dma_set_residue(state, residue); spin_unlock_irqrestore(&chan->vchan.lock, flags); From 75bdc7f31a3a6e9a12e218b31a44a1f54a91554c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Dec 2016 06:33:51 +0100 Subject: [PATCH 032/258] dmaengine: ti-dma-crossbar: Add some 'of_node_put()' in error path. Add some missing 'of_node_put()' in early exit error path. Signed-off-by: Christophe JAILLET Signed-off-by: Vinod Koul --- drivers/dma/ti-dma-crossbar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 3f24aeb48c0e..2403475a37cf 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -149,6 +149,7 @@ static int ti_am335x_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_am335x_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } @@ -339,6 +340,7 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev) match = of_match_node(ti_dra7_master_match, dma_node); if (!match) { dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); return -EINVAL; } From 73529c872a189c747bdb528ce9b85b67b0e28dec Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 5 Dec 2016 22:14:36 +0100 Subject: [PATCH 033/258] mtd: nand: xway: disable module support The xway_nand driver accesses the ltq_ebu_membase symbol which is not exported. This also should not get exported and we should handle the EBU interface in a better way later. This quick fix just deactivated support for building as module. Fixes: 99f2b107924c ("mtd: lantiq: Add NAND support on Lantiq XWAY SoC.") Cc: Signed-off-by: Hauke Mehrtens Signed-off-by: Boris Brezillon --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 353a9ddf6b97..2f35ab5426f8 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -540,7 +540,7 @@ config MTD_NAND_FSMC Flexible Static Memory Controller (FSMC) config MTD_NAND_XWAY - tristate "Support for NAND on Lantiq XWAY SoC" + bool "Support for NAND on Lantiq XWAY SoC" depends on LANTIQ && SOC_TYPE_XWAY help Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached From a2724663494f7313f53da10d8c0a729c5e3c4dea Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 5 Dec 2016 22:14:37 +0100 Subject: [PATCH 034/258] mtd: nand: xway: fix build because of module functions Remove the usage of modules functions to make this driver compile again. Otherwise an include of linux/modules.h would be needed. Fixes: 024366750c2e ("mtd: nand: xway: convert to normal platform driver") Cc: Signed-off-by: Hauke Mehrtens Signed-off-by: Boris Brezillon --- drivers/mtd/nand/xway_nand.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/mtd/nand/xway_nand.c b/drivers/mtd/nand/xway_nand.c index 1f2948c0c458..895101a5e686 100644 --- a/drivers/mtd/nand/xway_nand.c +++ b/drivers/mtd/nand/xway_nand.c @@ -232,7 +232,6 @@ static const struct of_device_id xway_nand_match[] = { { .compatible = "lantiq,nand-xway" }, {}, }; -MODULE_DEVICE_TABLE(of, xway_nand_match); static struct platform_driver xway_nand_driver = { .probe = xway_nand_probe, @@ -243,6 +242,4 @@ static struct platform_driver xway_nand_driver = { }, }; -module_platform_driver(xway_nand_driver); - -MODULE_LICENSE("GPL"); +builtin_platform_driver(xway_nand_driver); From f0fcdc506b76e924c60fa607bba5872ca4745476 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 1 Jan 2017 18:58:27 -0800 Subject: [PATCH 035/258] mtd: nand: oxnas_nand: fix build errors on arch/um, require HAS_IOMEM Fix build errors on arch/um, which does not support HAS_IOMEM, while the oxnas_nand.c driver uses interfaces that are supplied by HAS_IOMEM. (loadable module build:) ERROR: "devm_ioremap_resource" [drivers/mtd/nand/oxnas_nand.ko] undefined! or (built-in build:) drivers/built-in.o: In function `oxnas_nand_probe': drivers/mtd/nand/oxnas_nand.c:102: undefined reference to `devm_ioremap_resource' Fixes: 668592492409 ("mtd: nand: Add OX820 NAND Support") Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Acked-by: Neil Armstrong Signed-off-by: Boris Brezillon --- drivers/mtd/nand/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 2f35ab5426f8..9ce5dcb4abd0 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -426,6 +426,7 @@ config MTD_NAND_ORION config MTD_NAND_OXNAS tristate "NAND Flash support for Oxford Semiconductor SoC" + depends on HAS_IOMEM help This enables the NAND flash controller on Oxford Semiconductor SoCs. From 1f2ed153b916c95a49a1ca9d7107738664224b7f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Jan 2017 00:20:49 +0900 Subject: [PATCH 036/258] perf probe: Fix to get correct modname from elf header Since 'perf probe' supports cross-arch probes, it is possible to analyze different arch kernel image which has different bits-per-long. In that case, it fails to get the module name because it uses the MOD_NAME_OFFSET macro based on the host machine bits-per-long, instead of the target arch bits-per-long. This fixes above issue by changing modname-offset based on the target archs bit width. This is ok because linux kernel uses LP64 model on 64bit arch. E.g. without this (on x86_64, and target module is arm32): $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup :configfs_lookup+0 ^-Here is an empty module name. With this fix, you can see correct module name: $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup p:probe/configfs_lookup configfs:configfs_lookup+0 Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148337043836.6752.383495516397005695.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d281ae2b54e8..8f810961ec78 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -267,21 +267,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) return true; } -/* - * NOTE: - * '.gnu.linkonce.this_module' section of kernel module elf directly - * maps to 'struct module' from linux/module.h. This section contains - * actual module name which will be used by kernel after loading it. - * But, we cannot use 'struct module' here since linux/module.h is not - * exposed to user-space. Offset of 'name' has remained same from long - * time, so hardcoding it here. - */ -#ifdef __LP64__ -#define MOD_NAME_OFFSET 24 -#else -#define MOD_NAME_OFFSET 12 -#endif - /* * @module can be module name of module file path. In case of path, * inspect elf and find out what is actual module name. @@ -296,6 +281,7 @@ static char *find_module_name(const char *module) Elf_Data *data; Elf_Scn *sec; char *mod_name = NULL; + int name_offset; fd = open(module, O_RDONLY); if (fd < 0) @@ -317,7 +303,21 @@ static char *find_module_name(const char *module) if (!data || !data->d_buf) goto ret_err; - mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); + /* + * NOTE: + * '.gnu.linkonce.this_module' section of kernel module elf directly + * maps to 'struct module' from linux/module.h. This section contains + * actual module name which will be used by kernel after loading it. + * But, we cannot use 'struct module' here since linux/module.h is not + * exposed to user-space. Offset of 'name' has remained same from long + * time, so hardcoding it here. + */ + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) + name_offset = 12; + else /* expect ELFCLASS64 by default */ + name_offset = 24; + + mod_name = strdup((char *)data->d_buf + name_offset); ret_err: elf_end(elf); From 836c3ce2566fb8c1754f8d7c9534cad9bc8a6879 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 2 Jan 2017 12:07:37 +0200 Subject: [PATCH 037/258] dmaengine: omap-dma: Fix dynamic lch_map allocation The original patch did not done what it was supposed to be doing and even worst it broke legacy boot (OMAP1). The lch_map size should be the number of available logical channels in sDMA and the od->dma_requests should store the number of available DMA request lines usable in sDMA. In legacy mode we do not have a way to get the DMA request count, in that case we use OMAP_SDMA_REQUESTS (127), despite the fact that OMAP1510 have only 31 DMA request line. Fixes: 2d1a9a946fae ("dmaengine: omap-dma: Dynamically allocate memory for lch_map") Reported-by: Aaro Koskinen Cc: stable@vger.kernel.org # v4.9 Signed-off-by: Peter Ujfalusi Tested-by: Aaro Koskinen Signed-off-by: Vinod Koul --- drivers/dma/omap-dma.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index ac68666cd3f4..4ad101a47e0a 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -1452,6 +1452,7 @@ static int omap_dma_probe(struct platform_device *pdev) struct omap_dmadev *od; struct resource *res; int rc, i, irq; + u32 lch_count; od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); if (!od) @@ -1494,20 +1495,31 @@ static int omap_dma_probe(struct platform_device *pdev) spin_lock_init(&od->lock); spin_lock_init(&od->irq_lock); - if (!pdev->dev.of_node) { - od->dma_requests = od->plat->dma_attr->lch_count; - if (unlikely(!od->dma_requests)) - od->dma_requests = OMAP_SDMA_REQUESTS; - } else if (of_property_read_u32(pdev->dev.of_node, "dma-requests", - &od->dma_requests)) { + /* Number of DMA requests */ + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { dev_info(&pdev->dev, "Missing dma-requests property, using %u.\n", OMAP_SDMA_REQUESTS); - od->dma_requests = OMAP_SDMA_REQUESTS; } - od->lch_map = devm_kcalloc(&pdev->dev, od->dma_requests, - sizeof(*od->lch_map), GFP_KERNEL); + /* Number of available logical channels */ + if (!pdev->dev.of_node) { + lch_count = od->plat->dma_attr->lch_count; + if (unlikely(!lch_count)) + lch_count = OMAP_SDMA_CHANNELS; + } else if (of_property_read_u32(pdev->dev.of_node, "dma-channels", + &lch_count)) { + dev_info(&pdev->dev, + "Missing dma-channels property, using %u.\n", + OMAP_SDMA_CHANNELS); + lch_count = OMAP_SDMA_CHANNELS; + } + + od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map), + GFP_KERNEL); if (!od->lch_map) return -ENOMEM; From f53243b563e8966fb5a5cd8f27d48b832d3b1c43 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 2 Jan 2017 17:42:08 +0100 Subject: [PATCH 038/258] MAINTAINERS: dmaengine: Update + Hand over the at_hdmac driver to Ludovic Hand over the Microchip / Atmel DMA driver handled by at_hdmac driver to Ludovic who is responsible for the newer at_xdmac driver as well. Also update the entry name and position to follow company changes. Signed-off-by: Nicolas Ferre Signed-off-by: Vinod Koul --- MAINTAINERS | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..c10150853273 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2194,14 +2194,6 @@ L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/atmel -ATMEL DMA DRIVER -M: Nicolas Ferre -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Supported -F: drivers/dma/at_hdmac.c -F: drivers/dma/at_hdmac_regs.h -F: include/linux/platform_data/dma-atmel.h - ATMEL XDMA DRIVER M: Ludovic Desroches L: linux-arm-kernel@lists.infradead.org @@ -8174,6 +8166,15 @@ S: Maintained F: drivers/tty/serial/atmel_serial.c F: include/linux/atmel_serial.h +MICROCHIP / ATMEL DMA DRIVER +M: Ludovic Desroches +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: dmaengine@vger.kernel.org +S: Supported +F: drivers/dma/at_hdmac.c +F: drivers/dma/at_hdmac_regs.h +F: include/linux/platform_data/dma-atmel.h + MICROCHIP / ATMEL ISC DRIVER M: Songjun Wu L: linux-media@vger.kernel.org From 5c9e6c2b2ba3ec3a442e2fb5b4286498f8b4dcb7 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 16 Dec 2016 11:39:11 +0100 Subject: [PATCH 039/258] dmaengine: pl330: Fix runtime PM support for terminated transfers PL330 DMA engine driver is leaking a runtime reference after any terminated DMA transactions. This patch fixes this issue by tracking runtime PM state of the device and making additional call to pm_runtime_put() in terminate_all callback if needed. Fixes: ae43b3289186 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12") Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 87fd01539fcb..740bbb942594 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -448,6 +448,9 @@ struct dma_pl330_chan { /* for cyclic capability */ bool cyclic; + + /* for runtime pm tracking */ + bool active; }; struct pl330_dmac { @@ -2033,6 +2036,7 @@ static void pl330_tasklet(unsigned long data) _stop(pch->thread); spin_unlock(&pch->thread->dmac->lock); power_down = true; + pch->active = false; } else { /* Make sure the PL330 Channel thread is active */ spin_lock(&pch->thread->dmac->lock); @@ -2052,6 +2056,7 @@ static void pl330_tasklet(unsigned long data) desc->status = PREP; list_move_tail(&desc->node, &pch->work_list); if (power_down) { + pch->active = true; spin_lock(&pch->thread->dmac->lock); _start(pch->thread); spin_unlock(&pch->thread->dmac->lock); @@ -2166,6 +2171,7 @@ static int pl330_terminate_all(struct dma_chan *chan) unsigned long flags; struct pl330_dmac *pl330 = pch->dmac; LIST_HEAD(list); + bool power_down = false; pm_runtime_get_sync(pl330->ddma.dev); spin_lock_irqsave(&pch->lock, flags); @@ -2176,6 +2182,8 @@ static int pl330_terminate_all(struct dma_chan *chan) pch->thread->req[0].desc = NULL; pch->thread->req[1].desc = NULL; pch->thread->req_running = -1; + power_down = pch->active; + pch->active = false; /* Mark all desc done */ list_for_each_entry(desc, &pch->submitted_list, node) { @@ -2193,6 +2201,8 @@ static int pl330_terminate_all(struct dma_chan *chan) list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); spin_unlock_irqrestore(&pch->lock, flags); pm_runtime_mark_last_busy(pl330->ddma.dev); + if (power_down) + pm_runtime_put_autosuspend(pl330->ddma.dev); pm_runtime_put_autosuspend(pl330->ddma.dev); return 0; @@ -2357,6 +2367,7 @@ static void pl330_issue_pending(struct dma_chan *chan) * updated on work_list emptiness status. */ WARN_ON(list_empty(&pch->submitted_list)); + pch->active = true; pm_runtime_get_sync(pch->dmac->ddma.dev); } list_splice_tail_init(&pch->submitted_list, &pch->work_list); From 7165b8ad36f8bda42a5a8aa059b9a5071acc2210 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Mon, 19 Dec 2016 15:30:12 +0100 Subject: [PATCH 040/258] mtd: nand: tango: Update DT binding description Visually separate register ranges (address/size pairs) in reg prop. Change DMA channel name, for consistency with other drivers. Signed-off-by: Marc Gonzalez Signed-off-by: Boris Brezillon --- Documentation/devicetree/bindings/mtd/tango-nand.txt | 6 +++--- drivers/mtd/nand/tango_nand.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mtd/tango-nand.txt b/Documentation/devicetree/bindings/mtd/tango-nand.txt index ad5a02f2ac8c..cd1bf2ac9055 100644 --- a/Documentation/devicetree/bindings/mtd/tango-nand.txt +++ b/Documentation/devicetree/bindings/mtd/tango-nand.txt @@ -5,7 +5,7 @@ Required properties: - compatible: "sigma,smp8758-nand" - reg: address/size of nfc_reg, nfc_mem, and pbus_reg - dmas: reference to the DMA channel used by the controller -- dma-names: "nfc_sbox" +- dma-names: "rxtx" - clocks: reference to the system clock - #address-cells: <1> - #size-cells: <0> @@ -17,9 +17,9 @@ Example: nandc: nand-controller@2c000 { compatible = "sigma,smp8758-nand"; - reg = <0x2c000 0x30 0x2d000 0x800 0x20000 0x1000>; + reg = <0x2c000 0x30>, <0x2d000 0x800>, <0x20000 0x1000>; dmas = <&dma0 3>; - dma-names = "nfc_sbox"; + dma-names = "rxtx"; clocks = <&clkgen SYS_CLK>; #address-cells = <1>; #size-cells = <0>; diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 28c7f474be77..78a65f672c87 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -636,7 +636,7 @@ static int tango_nand_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); - nfc->chan = dma_request_chan(&pdev->dev, "nfc_sbox"); + nfc->chan = dma_request_chan(&pdev->dev, "rxtx"); if (IS_ERR(nfc->chan)) return PTR_ERR(nfc->chan); From 8043d25b3c0fa0a8f531333707f682f03b6febdb Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 3 Jan 2017 11:01:14 +0100 Subject: [PATCH 041/258] mtd: nand: tango: Reset pbus to raw mode in probe Linux should not expect the boot loader to properly configure the peripheral bus "pad mode", so reset PBUS_PAD_MODE to raw. Signed-off-by: Marc Gonzalez Signed-off-by: Boris Brezillon --- drivers/mtd/nand/tango_nand.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 78a65f672c87..4a5e948c62df 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -632,6 +632,8 @@ static int tango_nand_probe(struct platform_device *pdev) if (IS_ERR(nfc->pbus_base)) return PTR_ERR(nfc->pbus_base); + writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE); + clk = clk_get(&pdev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); From b66fb1da5a8cac3f5c3cdbe41937c91efc4e76a4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:54 +0100 Subject: [PATCH 042/258] tools lib subcmd: Add OPT_STRING_OPTARG_SET option To allow string options with a default argument and variable set when the option is used. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.c | 3 +++ tools/lib/subcmd/parse-options.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 3284bb14ae78..8aad81151d50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p, else err = get_arg(p, opt, flags, (const char **)opt->value); + if (opt->set) + *(bool *)opt->set = true; + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ if (opt->flags & PARSE_OPT_NOEMPTY) { const char *val = *(const char **)opt->value; diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 8866ac438b34..11c3be3bcce7 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -137,6 +137,11 @@ struct option { { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ .value = check_vtype(v, const char **), (a), .help = (h), \ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } +#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \ + { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, const char **), (a), .help = (h), \ + .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \ + .set = check_vtype(os, bool *)} #define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } From efd21307119d5a23ac83ae8fd5a39a5c7aeb8493 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:55 +0100 Subject: [PATCH 043/258] perf record: Make __record_options static There's no need for this one to be global. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 74d6a035133a..31cf0ce12a65 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1405,7 +1405,7 @@ static bool dry_run; * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * using pipes, etc. */ -struct option __record_options[] = { +static struct option __record_options[] = { OPT_CALLBACK('e', "event", &record.evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option), From 60437ac02f398e0ee0927748d4798dd5534ac90d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 09:19:56 +0100 Subject: [PATCH 044/258] perf record: Fix --switch-output documentation and comment There's no --signal-trigger option, also adding the code comment into record man page. Signed-off-by: Jiri Olsa Tested-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483431600-19887-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++++ tools/perf/builtin-record.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 27fc3617c6a4..5054d9147f0f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that particular perf.data snapshot should be kept or not. Implies --timestamp-filename, --no-buildid and --no-buildid-cache. +The reason for the latter two is to reduce the data file switching +overhead. You can still switch them on with: + + --switch-output --no-no-buildid --no-no-buildid-cache --dry-run:: Parse options then exit. --dry-run can be used to detect errors in cmdline diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 31cf0ce12a65..4ec10e9427d9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) * overhead. Still generate buildid if they are required * explicitly using * - * perf record --signal-trigger --no-no-buildid \ + * perf record --switch-output --no-no-buildid \ * --no-no-buildid-cache * * Following code equals to: From aa7c8da35d1905d80e840d075f07d26ec90144b5 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:27 -0500 Subject: [PATCH 045/258] btrfs: fix error handling when run_delayed_extent_op fails In __btrfs_run_delayed_refs, the error path when run_delayed_extent_op fails sets locked_ref->processing = 0 but doesn't re-increment delayed_refs->num_heads_ready. As a result, we end up triggering the WARN_ON in btrfs_select_ref_head. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Reported-by: Jon Nelson Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e97302f437a1..5366e50c84c6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2572,7 +2572,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); From d0280996437081dd12ed1e982ac8aeaa62835ec4 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:28 -0500 Subject: [PATCH 046/258] btrfs: fix locking when we put back a delayed ref that's too new In __btrfs_run_delayed_refs, when we put back a delayed ref that's too new, we have already dropped the lock on locked_ref when we set ->processing = 0. This patch keeps the lock to cover that assignment. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5366e50c84c6..ac7e6713033c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2522,11 +2522,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; From e321f8a801d7b4c40da8005257b05b9c2b51b072 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:11:04 -0800 Subject: [PATCH 047/258] Btrfs: use down_read_nested to make lockdep silent If @block_group is not @used_bg, it'll try to get @used_bg's lock without droping @block_group 's lock and lockdep has throwed a scary deadlock warning about it. Fix it by using down_read_nested. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ac7e6713033c..dcd2e798767e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7387,7 +7387,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, spin_unlock(&cluster->refill_lock); - down_read(&used_bg->data_rwsem); + /* We should only have one-level nested. */ + down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING); spin_lock(&cluster->refill_lock); if (used_bg == cluster->block_group) From 781feef7e6befafd4d9787d1f7ada1f9ccd504e4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:20:25 -0800 Subject: [PATCH 048/258] Btrfs: fix lockdep warning about log_mutex While checking INODE_REF/INODE_EXTREF for a corner case, we may acquire a different inode's log_mutex with holding the current inode's log_mutex, and lockdep has complained this with a possilble deadlock warning. Fix this by using mutex_lock_nested() when processing the other inode's log_mutex. Reviewed-by: Filipe Manana Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f10bf5213ed8..eeffff84f280 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -37,6 +37,7 @@ */ #define LOG_INODE_ALL 0 #define LOG_INODE_EXISTS 1 +#define LOG_OTHER_INODE 2 /* * directory trouble cases @@ -4641,7 +4642,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (S_ISDIR(inode->i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags) && - inode_only == LOG_INODE_EXISTS)) + inode_only >= LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; @@ -4665,7 +4666,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return ret; } - mutex_lock(&BTRFS_I(inode)->log_mutex); + if (inode_only == LOG_OTHER_INODE) { + inode_only = LOG_INODE_EXISTS; + mutex_lock_nested(&BTRFS_I(inode)->log_mutex, + SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&BTRFS_I(inode)->log_mutex); + } /* * a brute force approach to making sure we get the most uptodate @@ -4817,7 +4824,7 @@ again: * unpin it. */ err = btrfs_log_inode(trans, root, other_inode, - LOG_INODE_EXISTS, + LOG_OTHER_INODE, 0, LLONG_MAX, ctx); iput(other_inode); if (err) From c2931667c83ded6504b3857e99cc45b21fa496fb Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 22 Dec 2016 17:13:54 -0800 Subject: [PATCH 049/258] Btrfs: adjust outstanding_extents counter properly when dio write is split Currently how btrfs dio deals with split dio write is not good enough if dio write is split into several segments due to the lack of contiguous space, a large dio write like 'dd bs=1G count=1' can end up with incorrect outstanding_extents counter and endio would complain loudly with an assertion. This fixes the problem by compensating the outstanding_extents counter in inode if a large dio write gets split. Reported-by: Anand Jain Tested-by: Anand Jain Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a713d9d324b0..81b9d9d0450c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7623,11 +7623,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode, * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (dio_data->outstanding_extents) { + if (dio_data->outstanding_extents >= num_extents) { dio_data->outstanding_extents -= num_extents; } else { + /* + * If dio write length has been split due to no large enough + * contiguous space, we need to compensate our inode counter + * appropriately. + */ + u64 num_needed = num_extents - dio_data->outstanding_extents; + spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_extents; + BTRFS_I(inode)->outstanding_extents += num_needed; spin_unlock(&BTRFS_I(inode)->lock); } } From 3b046a97cbd35a73e1eef968dbfb1a0aac745a77 Mon Sep 17 00:00:00 2001 From: Robert LeBlanc Date: Mon, 5 Dec 2016 13:02:57 -0700 Subject: [PATCH 050/258] md/raid1: Refactor raid1_make_request Refactor raid1_make_request to make read and write code in their own functions to clean up the code. Signed-off-by: Robert LeBlanc Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 267 +++++++++++++++++++++++---------------------- 1 file changed, 139 insertions(+), 128 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a1f3fbed9100..14422407e520 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1066,17 +1066,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) kfree(plug); } -static void raid1_make_request(struct mddev *mddev, struct bio * bio) +static void raid1_read_request(struct mddev *mddev, struct bio *bio, + struct r1bio *r1_bio) { struct r1conf *conf = mddev->private; struct raid1_info *mirror; - struct r1bio *r1_bio; struct bio *read_bio; + struct bitmap *bitmap = mddev->bitmap; + const int op = bio_op(bio); + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + int sectors_handled; + int max_sectors; + int rdisk; + + wait_barrier(conf, bio); + +read_again: + rdisk = read_balance(conf, r1_bio, &max_sectors); + + if (rdisk < 0) { + /* couldn't find anywhere to read from */ + raid_end_bio_io(r1_bio); + return; + } + mirror = conf->mirrors + rdisk; + + if (test_bit(WriteMostly, &mirror->rdev->flags) && + bitmap) { + /* + * Reading from a write-mostly device must take care not to + * over-take any writes that are 'behind' + */ + raid1_log(mddev, "wait behind writes"); + wait_event(bitmap->behind_wait, + atomic_read(&bitmap->behind_writes) == 0); + } + r1_bio->read_disk = rdisk; + r1_bio->start_next_window = 0; + + read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, + max_sectors); + + r1_bio->bios[rdisk] = read_bio; + + read_bio->bi_iter.bi_sector = r1_bio->sector + + mirror->rdev->data_offset; + read_bio->bi_bdev = mirror->rdev->bdev; + read_bio->bi_end_io = raid1_end_read_request; + bio_set_op_attrs(read_bio, op, do_sync); + if (test_bit(FailFast, &mirror->rdev->flags) && + test_bit(R1BIO_FailFast, &r1_bio->state)) + read_bio->bi_opf |= MD_FAILFAST; + read_bio->bi_private = r1_bio; + + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), + read_bio, disk_devt(mddev->gendisk), + r1_bio->sector); + + if (max_sectors < r1_bio->sectors) { + /* + * could not read all from this device, so we will need another + * r1_bio. + */ + sectors_handled = (r1_bio->sector + max_sectors + - bio->bi_iter.bi_sector); + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + + /* + * Cannot call generic_make_request directly as that will be + * queued in __make_request and subsequent mempool_alloc might + * block waiting for it. So hand bio over to raid1d. + */ + reschedule_retry(r1_bio); + + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = bio; + r1_bio->sectors = bio_sectors(bio) - sectors_handled; + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); +} + +static void raid1_write_request(struct mddev *mddev, struct bio *bio, + struct r1bio *r1_bio) +{ + struct r1conf *conf = mddev->private; int i, disks; - struct bitmap *bitmap; + struct bitmap *bitmap = mddev->bitmap; unsigned long flags; const int op = bio_op(bio); - const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)); @@ -1096,15 +1186,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) md_write_start(mddev, bio); /* wait on superblock update early */ - if (bio_data_dir(bio) == WRITE && - ((bio_end_sector(bio) > mddev->suspend_lo && + if ((bio_end_sector(bio) > mddev->suspend_lo && bio->bi_iter.bi_sector < mddev->suspend_hi) || (mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, - bio->bi_iter.bi_sector, bio_end_sector(bio))))) { - /* As the suspend_* range is controlled by - * userspace, we want an interruptible - * wait. + bio->bi_iter.bi_sector, bio_end_sector(bio)))) { + + /* + * As the suspend_* range is controlled by userspace, we want + * an interruptible wait. */ DEFINE_WAIT(w); for (;;) { @@ -1115,128 +1205,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) bio->bi_iter.bi_sector >= mddev->suspend_hi || (mddev_is_clustered(mddev) && !md_cluster_ops->area_resyncing(mddev, WRITE, - bio->bi_iter.bi_sector, bio_end_sector(bio)))) + bio->bi_iter.bi_sector, + bio_end_sector(bio)))) break; schedule(); } finish_wait(&conf->wait_barrier, &w); } - start_next_window = wait_barrier(conf, bio); - bitmap = mddev->bitmap; - - /* - * make_request() can abort the operation when read-ahead is being - * used and no empty request is available. - * - */ - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); - - r1_bio->master_bio = bio; - r1_bio->sectors = bio_sectors(bio); - r1_bio->state = 0; - r1_bio->mddev = mddev; - r1_bio->sector = bio->bi_iter.bi_sector; - - /* We might need to issue multiple reads to different - * devices if there are bad blocks around, so we keep - * track of the number of reads in bio->bi_phys_segments. - * If this is 0, there is only one r1_bio and no locking - * will be needed when requests complete. If it is - * non-zero, then it is the number of not-completed requests. - */ - bio->bi_phys_segments = 0; - bio_clear_flag(bio, BIO_SEG_VALID); - - if (rw == READ) { - /* - * read balancing logic: - */ - int rdisk; - -read_again: - rdisk = read_balance(conf, r1_bio, &max_sectors); - - if (rdisk < 0) { - /* couldn't find anywhere to read from */ - raid_end_bio_io(r1_bio); - return; - } - mirror = conf->mirrors + rdisk; - - if (test_bit(WriteMostly, &mirror->rdev->flags) && - bitmap) { - /* Reading from a write-mostly device must - * take care not to over-take any writes - * that are 'behind' - */ - raid1_log(mddev, "wait behind writes"); - wait_event(bitmap->behind_wait, - atomic_read(&bitmap->behind_writes) == 0); - } - r1_bio->read_disk = rdisk; - r1_bio->start_next_window = 0; - - read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); - bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - - r1_bio->bios[rdisk] = read_bio; - - read_bio->bi_iter.bi_sector = r1_bio->sector + - mirror->rdev->data_offset; - read_bio->bi_bdev = mirror->rdev->bdev; - read_bio->bi_end_io = raid1_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); - if (test_bit(FailFast, &mirror->rdev->flags) && - test_bit(R1BIO_FailFast, &r1_bio->state)) - read_bio->bi_opf |= MD_FAILFAST; - read_bio->bi_private = r1_bio; - - if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), - read_bio, disk_devt(mddev->gendisk), - r1_bio->sector); - - if (max_sectors < r1_bio->sectors) { - /* could not read all from this device, so we will - * need another r1_bio. - */ - - sectors_handled = (r1_bio->sector + max_sectors - - bio->bi_iter.bi_sector); - r1_bio->sectors = max_sectors; - spin_lock_irq(&conf->device_lock); - if (bio->bi_phys_segments == 0) - bio->bi_phys_segments = 2; - else - bio->bi_phys_segments++; - spin_unlock_irq(&conf->device_lock); - /* Cannot call generic_make_request directly - * as that will be queued in __make_request - * and subsequent mempool_alloc might block waiting - * for it. So hand bio over to raid1d. - */ - reschedule_retry(r1_bio); - - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); - - r1_bio->master_bio = bio; - r1_bio->sectors = bio_sectors(bio) - sectors_handled; - r1_bio->state = 0; - r1_bio->mddev = mddev; - r1_bio->sector = bio->bi_iter.bi_sector + - sectors_handled; - goto read_again; - } else - generic_make_request(read_bio); - return; - } - - /* - * WRITE: - */ if (conf->pending_count >= max_queued_requests) { md_wakeup_thread(mddev->thread); raid1_log(mddev, "wait queued"); @@ -1280,8 +1257,7 @@ read_again: int bad_sectors; int is_bad; - is_bad = is_badblock(rdev, r1_bio->sector, - max_sectors, + is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, &first_bad, &bad_sectors); if (is_bad < 0) { /* mustn't write here until the bad block is @@ -1370,7 +1346,8 @@ read_again: continue; mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); - bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors); + bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, + max_sectors); if (first_clone) { /* do behind I/O ? @@ -1464,6 +1441,40 @@ read_again: wake_up(&conf->wait_barrier); } +static void raid1_make_request(struct mddev *mddev, struct bio *bio) +{ + struct r1conf *conf = mddev->private; + struct r1bio *r1_bio; + + /* + * make_request() can abort the operation when read-ahead is being + * used and no empty request is available. + * + */ + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = bio; + r1_bio->sectors = bio_sectors(bio); + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_iter.bi_sector; + + /* + * We might need to issue multiple reads to different devices if there + * are bad blocks around, so we keep track of the number of reads in + * bio->bi_phys_segments. If this is 0, there is only one r1_bio and + * no locking will be needed when requests complete. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + bio_clear_flag(bio, BIO_SEG_VALID); + + if (bio_data_dir(bio) == READ) + raid1_read_request(mddev, bio, r1_bio); + else + raid1_write_request(mddev, bio, r1_bio); +} + static void raid1_status(struct seq_file *seq, struct mddev *mddev) { struct r1conf *conf = mddev->private; From bb5f1ed70bc3bbbce510907da3432dab267ff508 Mon Sep 17 00:00:00 2001 From: Robert LeBlanc Date: Mon, 5 Dec 2016 13:02:58 -0700 Subject: [PATCH 051/258] md/raid10: Refactor raid10_make_request Refactor raid10_make_request into seperate read and write functions to clean up the code. Shaohua: add the recovery check back to read path Signed-off-by: Robert LeBlanc Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 245 +++++++++++++++++++++++++------------------- 1 file changed, 140 insertions(+), 105 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab5e86209322..1920756828df 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) kfree(plug); } -static void __make_request(struct mddev *mddev, struct bio *bio) +static void raid10_read_request(struct mddev *mddev, struct bio *bio, + struct r10bio *r10_bio) { struct r10conf *conf = mddev->private; - struct r10bio *r10_bio; struct bio *read_bio; + const int op = bio_op(bio); + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + int sectors_handled; + int max_sectors; + sector_t sectors; + struct md_rdev *rdev; + int slot; + + /* + * Register the new request and wait if the reconstruction + * thread has put up a bar for new requests. + * Continue immediately if no resync is active currently. + */ + wait_barrier(conf); + + sectors = bio_sectors(bio); + while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + bio->bi_iter.bi_sector < conf->reshape_progress && + bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { + /* + * IO spans the reshape position. Need to wait for reshape to + * pass + */ + raid10_log(conf->mddev, "wait reshape"); + allow_barrier(conf); + wait_event(conf->wait_barrier, + conf->reshape_progress <= bio->bi_iter.bi_sector || + conf->reshape_progress >= bio->bi_iter.bi_sector + + sectors); + wait_barrier(conf); + } + +read_again: + rdev = read_balance(conf, r10_bio, &max_sectors); + if (!rdev) { + raid_end_bio_io(r10_bio); + return; + } + slot = r10_bio->read_slot; + + read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, + max_sectors); + + r10_bio->devs[slot].bio = read_bio; + r10_bio->devs[slot].rdev = rdev; + + read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + + choose_data_offset(r10_bio, rdev); + read_bio->bi_bdev = rdev->bdev; + read_bio->bi_end_io = raid10_end_read_request; + bio_set_op_attrs(read_bio, op, do_sync); + if (test_bit(FailFast, &rdev->flags) && + test_bit(R10BIO_FailFast, &r10_bio->state)) + read_bio->bi_opf |= MD_FAILFAST; + read_bio->bi_private = r10_bio; + + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), + read_bio, disk_devt(mddev->gendisk), + r10_bio->sector); + if (max_sectors < r10_bio->sectors) { + /* + * Could not read all from this device, so we will need another + * r10_bio. + */ + sectors_handled = (r10_bio->sector + max_sectors + - bio->bi_iter.bi_sector); + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + /* + * Cannot call generic_make_request directly as that will be + * queued in __generic_make_request and subsequent + * mempool_alloc might block waiting for it. so hand bio over + * to raid10d. + */ + reschedule_retry(r10_bio); + + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = bio_sectors(bio) - sectors_handled; + r10_bio->state = 0; + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); + return; +} + +static void raid10_write_request(struct mddev *mddev, struct bio *bio, + struct r10bio *r10_bio) +{ + struct r10conf *conf = mddev->private; int i; const int op = bio_op(bio); - const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); const unsigned long do_fua = (bio->bi_opf & REQ_FUA); unsigned long flags; struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; struct raid10_plug_cb *plug = NULL; + sector_t sectors; int sectors_handled; int max_sectors; - int sectors; md_write_start(mddev, bio); @@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio) while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_iter.bi_sector < conf->reshape_progress && bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { - /* IO spans the reshape position. Need to wait for - * reshape to pass + /* + * IO spans the reshape position. Need to wait for reshape to + * pass */ raid10_log(conf->mddev, "wait reshape"); allow_barrier(conf); @@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) sectors); wait_barrier(conf); } + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && - bio_data_dir(bio) == WRITE && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && bio->bi_iter.bi_sector + sectors > conf->reshape_progress) @@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio) conf->reshape_safe = mddev->reshape_position; } - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); - - r10_bio->master_bio = bio; - r10_bio->sectors = sectors; - - r10_bio->mddev = mddev; - r10_bio->sector = bio->bi_iter.bi_sector; - r10_bio->state = 0; - - /* We might need to issue multiple reads to different - * devices if there are bad blocks around, so we keep - * track of the number of reads in bio->bi_phys_segments. - * If this is 0, there is only one r10_bio and no locking - * will be needed when the request completes. If it is - * non-zero, then it is the number of not-completed requests. - */ - bio->bi_phys_segments = 0; - bio_clear_flag(bio, BIO_SEG_VALID); - - if (rw == READ) { - /* - * read balancing logic: - */ - struct md_rdev *rdev; - int slot; - -read_again: - rdev = read_balance(conf, r10_bio, &max_sectors); - if (!rdev) { - raid_end_bio_io(r10_bio); - return; - } - slot = r10_bio->read_slot; - - read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); - bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, - max_sectors); - - r10_bio->devs[slot].bio = read_bio; - r10_bio->devs[slot].rdev = rdev; - - read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + - choose_data_offset(r10_bio, rdev); - read_bio->bi_bdev = rdev->bdev; - read_bio->bi_end_io = raid10_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); - if (test_bit(FailFast, &rdev->flags) && - test_bit(R10BIO_FailFast, &r10_bio->state)) - read_bio->bi_opf |= MD_FAILFAST; - read_bio->bi_private = r10_bio; - - if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), - read_bio, disk_devt(mddev->gendisk), - r10_bio->sector); - if (max_sectors < r10_bio->sectors) { - /* Could not read all from this device, so we will - * need another r10_bio. - */ - sectors_handled = (r10_bio->sector + max_sectors - - bio->bi_iter.bi_sector); - r10_bio->sectors = max_sectors; - spin_lock_irq(&conf->device_lock); - if (bio->bi_phys_segments == 0) - bio->bi_phys_segments = 2; - else - bio->bi_phys_segments++; - spin_unlock_irq(&conf->device_lock); - /* Cannot call generic_make_request directly - * as that will be queued in __generic_make_request - * and subsequent mempool_alloc might block - * waiting for it. so hand bio over to raid10d. - */ - reschedule_retry(r10_bio); - - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); - - r10_bio->master_bio = bio; - r10_bio->sectors = bio_sectors(bio) - sectors_handled; - r10_bio->state = 0; - r10_bio->mddev = mddev; - r10_bio->sector = bio->bi_iter.bi_sector + - sectors_handled; - goto read_again; - } else - generic_make_request(read_bio); - return; - } - - /* - * WRITE: - */ if (conf->pending_count >= max_queued_requests) { md_wakeup_thread(mddev->thread); raid10_log(mddev, "wait queued"); @@ -1300,8 +1308,7 @@ retry_write: int bad_sectors; int is_bad; - is_bad = is_badblock(rdev, dev_sector, - max_sectors, + is_bad = is_badblock(rdev, dev_sector, max_sectors, &first_bad, &bad_sectors); if (is_bad < 0) { /* Mustn't write here until the bad block @@ -1405,8 +1412,7 @@ retry_write: r10_bio->devs[i].bio = mbio; mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ - choose_data_offset(r10_bio, - rdev)); + choose_data_offset(r10_bio, rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); @@ -1457,8 +1463,7 @@ retry_write: r10_bio->devs[i].repl_bio = mbio; mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr + - choose_data_offset( - r10_bio, rdev)); + choose_data_offset(r10_bio, rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); @@ -1503,6 +1508,36 @@ retry_write: one_write_done(r10_bio); } +static void __make_request(struct mddev *mddev, struct bio *bio) +{ + struct r10conf *conf = mddev->private; + struct r10bio *r10_bio; + + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = bio_sectors(bio); + + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_iter.bi_sector; + r10_bio->state = 0; + + /* + * We might need to issue multiple reads to different devices if there + * are bad blocks around, so we keep track of the number of reads in + * bio->bi_phys_segments. If this is 0, there is only one r10_bio and + * no locking will be needed when the request completes. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + bio_clear_flag(bio, BIO_SEG_VALID); + + if (bio_data_dir(bio) == READ) + raid10_read_request(mddev, bio, r10_bio); + else + raid10_write_request(mddev, bio, r10_bio); +} + static void raid10_make_request(struct mddev *mddev, struct bio *bio) { struct r10conf *conf = mddev->private; From 074859184d770824f4437dca716bdeb625ae8b1c Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 3 Jan 2017 12:42:42 +0100 Subject: [PATCH 052/258] tools lib traceevent: Fix prev/next_prio for deadline tasks Currently, the sched:sched_switch tracepoint reports deadline tasks with priority -1. But when reading the trace via perf script I've got the following output: # ./d & # (d is a deadline task, see [1]) # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295] d 2593 [000] 2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295] The task d reports the wrong priority [4294967295]. This happens because the "int prio" is stored in an unsigned long long val. Although it is set as a %lld, as int is shorter than unsigned long long, trace_seq_printf prints it as a positive number. The fix is just to cast the val as an int, and print it as a %d, as in the sched:sched_switch tracepoint's "format". The output with the fix is: # ./d & # perf record -e sched:sched_switch -a sleep 1 # perf script ... swapper 0 [000] 4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1] d 10941 [000] 4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120] [1] d.c --- #include #include #include #include #include struct sched_attr { __u32 size, sched_policy; __u64 sched_flags; __s32 sched_nice; __u32 sched_priority; __u64 sched_runtime, sched_deadline, sched_period; }; int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } int main(void) { struct sched_attr attr = { .size = sizeof(attr), .sched_policy = SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */ .sched_runtime = 10 * 1000 * 1000, .sched_period = attr.sched_deadline = 30 * 1000 * 1000, }; if (sched_setattr(0, &attr, 0) < 0) { perror("sched_setattr"); return -1; } for(;;); } --- Committer notes: Got the program from the provided URL, http://bristot.me/lkml/d.c, trimmed it and included in the cset log above, so that we have everything needed to test it in one place. Signed-off-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/plugin_sched_switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index f1ce60065258..ec30c2fcbac0 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld ", val); if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) - trace_seq_printf(s, "[%lld] ", val); + trace_seq_printf(s, "[%d] ", (int) val); if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) write_state(s, val); @@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, trace_seq_printf(s, "%lld", val); if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%lld]", val); + trace_seq_printf(s, " [%d]", (int) val); return 0; } From 30a9c6444810429aa2b7cbfbd453ce339baaadbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 12:03:59 -0300 Subject: [PATCH 053/258] perf tools: Install tools/lib/traceevent plugins with install-bin Those are binaries as well, so should be installed by: make -C tools/perf install-bin' too. Cc: Alexander Shishkin Cc: Daniel Bristot de Oliveira Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-3841b37u05evxrs1igkyu6ks@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e9ec531131ca..4db68aec9913 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -704,9 +704,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' -install-bin: install-tools install-tests +install-bin: install-tools install-tests install-traceevent-plugins -install: install-bin try-install-man install-traceevent-plugins +install: install-bin try-install-man install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' From 7934c98a6e04028eb34c1293bfb5a6b0ab630b66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2017 15:19:21 -0300 Subject: [PATCH 054/258] perf symbols: Robustify reading of build-id from sysfs Markus reported that perf segfaults when reading /sys/kernel/notes from a kernel linked with GNU gold, due to what looks like a gold bug, so do some bounds checking to avoid crashing in that case. Reported-by: Markus Trippelsdorf Report-Link: http://lkml.kernel.org/r/20161219161821.GA294@x4 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ryhgs6a6jxvz207j2636w31c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 99400b0e8f2a..adbc6c02c3aa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; } else { int n = namesz + descsz; + + if (n > (int)sizeof(bf)) { + n = sizeof(bf); + pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n", + __func__, filename, nhdr.n_namesz, nhdr.n_descsz); + } if (read(fd, bf, n) != n) break; } From 47e3a5edc6538d66e470aaed3b7c57255cb37ca1 Mon Sep 17 00:00:00 2001 From: Paul Donohue Date: Tue, 3 Jan 2017 10:39:28 -0800 Subject: [PATCH 055/258] Input: ALPS - fix TrackStick Y axis handling for SS5 hardware A minus character was lost in commit 23fce365, causing the Y axis to be inverted for SS5 TrackStick events. (Pushing the TrackStick up caused the pointer to move down, and vice versa.) Restore the lost minus. Fixes: 23fce365c6a2 ("Input: ALPS - clean up code for SS5 hardware") Signed-off-by: Paul Donohue Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index cde6f4bd8ea2..6d279aa27cb9 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -114,7 +114,7 @@ enum SS4_PACKET_ID { (_b[1] & 0x7F) \ ) -#define SS4_TS_Y_V2(_b) (s8)( \ +#define SS4_TS_Y_V2(_b) -(s8)( \ ((_b[3] & 0x01) << 7) | \ (_b[2] & 0x7F) \ ) From 01427fe7c4b956b878e55e966690624a3624e991 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2017 11:51:48 -0800 Subject: [PATCH 056/258] Input: adxl34x - make it enumerable in ACPI environment The ACPI-enabled platform may contain _DSD method to enable this driver using compatible string. Remove OF specifics to re-use existing code on ACPI-enabled platforms. Suggested-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov --- drivers/input/misc/adxl34x-i2c.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c index a8b0a2eec344..7fed92fb8cc1 100644 --- a/drivers/input/misc/adxl34x-i2c.c +++ b/drivers/input/misc/adxl34x-i2c.c @@ -136,7 +136,6 @@ static const struct i2c_device_id adxl34x_id[] = { MODULE_DEVICE_TABLE(i2c, adxl34x_id); -#ifdef CONFIG_OF static const struct of_device_id adxl34x_of_id[] = { /* * The ADXL346 is backward-compatible with the ADXL345. Differences are @@ -153,13 +152,12 @@ static const struct of_device_id adxl34x_of_id[] = { }; MODULE_DEVICE_TABLE(of, adxl34x_of_id); -#endif static struct i2c_driver adxl34x_driver = { .driver = { .name = "adxl34x", .pm = &adxl34x_i2c_pm, - .of_match_table = of_match_ptr(adxl34x_of_id), + .of_match_table = adxl34x_of_id, }, .probe = adxl34x_i2c_probe, .remove = adxl34x_i2c_remove, From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 4 Jan 2017 12:29:05 +0900 Subject: [PATCH 057/258] perf probe: Fix --funcs to show correct symbols for offline module Fix --funcs (-F) option to show correct symbols for offline module. Since previous perf-probe uses machine__findnew_module_map() for offline module, even if user passes a module file (with full path) which is for other architecture, perf-probe always tries to load symbol map for current kernel module. This fix uses dso__new_map() to load the map from given binary as same as a map for user applications. Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8f810961ec78..542e6472c4d7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module) /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__findnew_module_map(host_machine, 0, module); + return dso__new_map(module); if (!module) module = "kernel"; @@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module) if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0 && module[pos->dso->short_name_len - 2] == '\0') { + map__get(pos); return pos; } } @@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user) return kernel_get_module_map(target); } -static void put_target_map(struct map *map, bool user) -{ - if (map && user) { - /* Only the user map needs to be released */ - map__put(map); - } -} - - static int convert_exec_to_group(const char *exec, char **result) { char *ptr1, *ptr2, *exec_copy; @@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, } out: - put_target_map(map, uprobes); + map__put(map); return ret; } @@ -2869,7 +2861,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - put_target_map(map, pev->uprobes); + map__put(map); free(syms); return ret; @@ -3362,10 +3354,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, return ret; /* Get a symbol map */ - if (user) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, user); if (!map) { pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); return -EINVAL; @@ -3397,9 +3386,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, } end: - if (user) { - map__put(map); - } + map__put(map); exit_probe_symbol_maps(); return ret; From 8a937a25a7e3c19d5fb3f9d92f605cf5fda219d8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 4 Jan 2017 12:30:19 +0900 Subject: [PATCH 058/258] perf probe: Fix to probe on gcc generated symbols for offline kernel Fix perf-probe to show probe definition on gcc generated symbols for offline kernel (including cross-arch kernel image). gcc sometimes optimizes functions and generate new symbols with suffixes such as ".constprop.N" or ".isra.N" etc. Since those symbol names are not recorded in DWARF, we have to find correct generated symbols from offline ELF binary to probe on it (kallsyms doesn't correct it). For online kernel or uprobes we don't need it because those are rebased on _text, or a section relative address. E.g. Without this: $ perf probe -k build-arm/vmlinux -F __slab_alloc* __slab_alloc.constprop.9 $ perf probe -k build-arm/vmlinux -D __slab_alloc p:probe/__slab_alloc __slab_alloc+0 If you put above definition on target machine, it should fail because there is no __slab_alloc in kallsyms. With this fix, perf probe shows correct probe definition on __slab_alloc.constprop.9: $ perf probe -k build-arm/vmlinux -D __slab_alloc p:probe/__slab_alloc __slab_alloc.constprop.9+0 Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148350060434.19001.11864836288580083501.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 48 ++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 542e6472c4d7..4a57c8a60bd9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -610,6 +610,51 @@ error: return ret ? : -ENOENT; } +/* + * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions + * and generate new symbols with suffixes such as .constprop.N or .isra.N + * etc. Since those symbols are not recorded in DWARF, we have to find + * correct generated symbols from offline ELF binary. + * For online kernel or uprobes we don't need this because those are + * rebased on _text, or already a section relative address. + */ +static int +post_process_offline_probe_trace_events(struct probe_trace_event *tevs, + int ntevs, const char *pathname) +{ + struct symbol *sym; + struct map *map; + unsigned long stext = 0; + u64 addr; + int i; + + /* Prepare a map for offline binary */ + map = dso__new_map(pathname); + if (!map || get_text_start_address(pathname, &stext) < 0) { + pr_warning("Failed to get ELF symbols for %s\n", pathname); + return -EINVAL; + } + + for (i = 0; i < ntevs; i++) { + addr = tevs[i].point.address + tevs[i].point.offset - stext; + sym = map__find_symbol(map, addr); + if (!sym) + continue; + if (!strcmp(sym->name, tevs[i].point.symbol)) + continue; + /* If we have no realname, use symbol for it */ + if (!tevs[i].point.realname) + tevs[i].point.realname = tevs[i].point.symbol; + else + free(tevs[i].point.symbol); + tevs[i].point.symbol = strdup(sym->name); + tevs[i].point.offset = addr - sym->start; + } + map__put(map); + + return 0; +} + static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *exec) { @@ -671,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, /* Skip post process if the target is an offline kernel */ if (symbol_conf.ignore_vmlinux_buildid) - return 0; + return post_process_offline_probe_trace_events(tevs, ntevs, + symbol_conf.vmlinux_name); reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { From 4ee437fbf626b5ad756889d8bc0fcead3d66dde7 Mon Sep 17 00:00:00 2001 From: Caleb Crome Date: Tue, 3 Jan 2017 10:22:57 -0800 Subject: [PATCH 059/258] ASoC: fsl_ssi: set fifo watermark to more reliable value The fsl_ssi fifo watermark is by default set to 2 free spaces (i.e. activate DMA on FIFO when only 2 spaces are left.) This means the DMA must service the fifo within 2 audio samples, which is just not enough time for many use cases with high data rate. In many configurations the audio channel slips (causing l/r swap in stereo configurations, or channel slipping in multi-channel configurations). This patch gives more breathing room and allows the SSI to operate reliably by changing the fifio refill watermark to 8. There is no change in behavior for older chips (with an 8-deep fifo). Only the newer chips with a 15-deep fifo get the new behavior. I suspect a new fifo depth setting could be optimized on the older chips too, but I have not tested. Signed-off-by: Caleb Crome Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 74 +++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 50349437d961..fde08660b63b 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -224,6 +224,12 @@ struct fsl_ssi_soc_data { * @dbg_stats: Debugging statistics * * @soc: SoC specific data + * + * @fifo_watermark: the FIFO watermark setting. Notifies DMA when + * there are @fifo_watermark or fewer words in TX fifo or + * @fifo_watermark or more empty words in RX fifo. + * @dma_maxburst: max number of words to transfer in one go. So far, + * this is always the same as fifo_watermark. */ struct fsl_ssi_private { struct regmap *regs; @@ -263,6 +269,9 @@ struct fsl_ssi_private { const struct fsl_ssi_soc_data *soc; struct device *dev; + + u32 fifo_watermark; + u32 dma_maxburst; }; /* @@ -1051,21 +1060,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, regmap_write(regs, CCSR_SSI_SRCR, srcr); regmap_write(regs, CCSR_SSI_SCR, scr); - /* - * Set the watermark for transmit FIFI 0 and receive FIFO 0. We don't - * use FIFO 1. We program the transmit water to signal a DMA transfer - * if there are only two (or fewer) elements left in the FIFO. Two - * elements equals one frame (left channel, right channel). This value, - * however, depends on the depth of the transmit buffer. - * - * We set the watermark on the same level as the DMA burstsize. For - * fiq it is probably better to use the biggest possible watermark - * size. - */ - if (ssi_private->use_dma) - wm = ssi_private->fifo_depth - 2; - else - wm = ssi_private->fifo_depth; + wm = ssi_private->fifo_watermark; regmap_write(regs, CCSR_SSI_SFCSR, CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) | @@ -1373,12 +1368,8 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, dev_dbg(&pdev->dev, "could not get baud clock: %ld\n", PTR_ERR(ssi_private->baudclk)); - /* - * We have burstsize be "fifo_depth - 2" to match the SSI - * watermark setting in fsl_ssi_startup(). - */ - ssi_private->dma_params_tx.maxburst = ssi_private->fifo_depth - 2; - ssi_private->dma_params_rx.maxburst = ssi_private->fifo_depth - 2; + ssi_private->dma_params_tx.maxburst = ssi_private->dma_maxburst; + ssi_private->dma_params_rx.maxburst = ssi_private->dma_maxburst; ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0; ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0; @@ -1543,6 +1534,47 @@ static int fsl_ssi_probe(struct platform_device *pdev) /* Older 8610 DTs didn't have the fifo-depth property */ ssi_private->fifo_depth = 8; + /* + * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't + * use FIFO 1 but set the watermark appropriately nontheless. + * We program the transmit water to signal a DMA transfer + * if there are N elements left in the FIFO. For chips with 15-deep + * FIFOs, set watermark to 8. This allows the SSI to operate at a + * high data rate without channel slipping. Behavior is unchanged + * for the older chips with a fifo depth of only 8. A value of 4 + * might be appropriate for the older chips, but is left at + * fifo_depth-2 until sombody has a chance to test. + * + * We set the watermark on the same level as the DMA burstsize. For + * fiq it is probably better to use the biggest possible watermark + * size. + */ + switch (ssi_private->fifo_depth) { + case 15: + /* + * 2 samples is not enough when running at high data + * rates (like 48kHz @ 16 bits/channel, 16 channels) + * 8 seems to split things evenly and leave enough time + * for the DMA to fill the FIFO before it's over/under + * run. + */ + ssi_private->fifo_watermark = 8; + ssi_private->dma_maxburst = 8; + break; + case 8: + default: + /* + * maintain old behavior for older chips. + * Keeping it the same because I don't have an older + * board to test with. + * I suspect this could be changed to be something to + * leave some more space in the fifo. + */ + ssi_private->fifo_watermark = ssi_private->fifo_depth - 2; + ssi_private->dma_maxburst = ssi_private->fifo_depth - 2; + break; + } + dev_set_drvdata(&pdev->dev, ssi_private); if (ssi_private->soc->imx) { From cf9e1672a66c49ed8903c01b4c380a2f2dc91b40 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 5 Dec 2016 03:47:10 +0200 Subject: [PATCH 060/258] mtd: nand: lpc32xx: fix invalid error handling of a requested irq Semantics of NR_IRQS is different on machines with SPARSE_IRQ option disabled or enabled, in the latter case IRQs are allocated starting at least from the value specified by NR_IRQS and going upwards, so the check of (irq >= NR_IRQ) to decide about an error code returned by platform_get_irq() is completely invalid, don't attempt to overrule irq subsystem in the driver. The change fixes LPC32xx NAND MLC driver initialization on boot. Fixes: 8cb17b5ed017 ("irqchip: Add LPC32xx interrupt controller driver") Cc: stable@kernel.org # v4.7+ Signed-off-by: Vladimir Zapolskiy Acked-by: Sylvain Lemieux Signed-off-by: Boris Brezillon --- drivers/mtd/nand/lpc32xx_mlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index 5553a5d9efd1..846a66c1b133 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -775,7 +775,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) init_completion(&host->comp_controller); host->irq = platform_get_irq(pdev, 0); - if ((host->irq < 0) || (host->irq >= NR_IRQS)) { + if (host->irq < 0) { dev_err(&pdev->dev, "failed to get platform irq\n"); res = -EINVAL; goto err_exit3; From dd853fd216d1485ed3045ff772079cc8689a9a4a Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Wed, 28 Dec 2016 14:55:40 +0100 Subject: [PATCH 061/258] x86/cpu: Fix bootup crashes by sanitizing the argument of the 'clearcpuid=' command-line option A negative number can be specified in the cmdline which will be used as setup_clear_cpu_cap() argument. With that we can clear/set some bit in memory predceeding boot_cpu_data/cpu_caps_cleared which may cause kernel to misbehave. This patch adds lower bound check to setup_disablecpuid(). Boris Petkov reproduced a crash: [ 1.234575] BUG: unable to handle kernel paging request at ffffffff858bd540 [ 1.236535] IP: memcpy_erms+0x6/0x10 Signed-off-by: Lukasz Odzioba Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andi.kleen@intel.com Cc: bp@alien8.de Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: slaoub@gmail.com Fixes: ac72e7888a61 ("x86: add generic clearcpuid=... option") Link: http://lkml.kernel.org/r/1482933340-11857-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dc1697ca5191..9bab7a8a4293 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg) { int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) + if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) setup_clear_cpu_cap(bit); else return 0; From 754c73cf4d2463022b2c9ae208026bf22564ed06 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 11:22:29 +0200 Subject: [PATCH 062/258] x86/cpu: Fix typo in the comment for Anniedale The proper spelling of Anniedale SoC with 'e' in the middle. Fix typo in the comment line in intel-family.h header. Signed-off-by: Andy Shevchenko Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170102092229.87036-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-family.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 34a46dc076d3..8167fdb67ae8 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -57,7 +57,7 @@ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ From 159d3726db12b3476bc59ea0ab0a702103d466b5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 11:24:50 +0200 Subject: [PATCH 063/258] x86/platform/intel-mid: Rename 'spidev' to 'mrfld_spidev' The current implementation supports only Intel Merrifield platforms. Don't mess with the rest of the Intel MID family by not registering device with wrong properties. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170102092450.87229-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 +- .../{platform_spidev.c => platform_mrfld_spidev.c} | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) rename arch/x86/platform/intel-mid/device_libs/{platform_spidev.c => platform_mrfld_spidev.c} (91%) diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 61b5ed2b7d40..90e4f2a6625b 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices -obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o +obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c similarity index 91% rename from arch/x86/platform/intel-mid/device_libs/platform_spidev.c rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c index 30c601b399ee..27186ad654c9 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c @@ -11,6 +11,7 @@ * of the License. */ +#include #include #include #include @@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info) { struct spi_board_info *spi_info = info; + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return ERR_PTR(-ENODEV); + spi_info->mode = SPI_MODE_0; spi_info->controller_data = &spidev_spi_chip; From 74545f63890e38520eb4d1dbedcadaa9c0dbc824 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Thu, 22 Dec 2016 17:17:40 -0800 Subject: [PATCH 064/258] perf/x86: Set pmu->module in Intel PMU modules The conversion of Intel PMU drivers into modules did not include reference counting. The machine will crash when attempting to access deleted code if an event from a module PMU is started and the module removed before the event is destroyed. i.e. this crashes the machine: $ insmod intel-rapl-perf.ko $ perf stat -e power/energy-cores/ -C 0 & $ rmmod intel-rapl-perf.ko Set THIS_MODULE to pmu->module in Intel module PMUs so that generic code can handle reference counting and deny rmmod while an event still exists. Signed-off-by: David Carrillo-Cisneros Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1482455860-116269-1-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 2 ++ arch/x86/events/intel/rapl.c | 1 + arch/x86/events/intel/uncore.c | 1 + 3 files changed, 4 insertions(+) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index fec8a461bdef..1076c9a77292 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static struct pmu cstate_pkg_pmu = { @@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static const struct cstate_model nhm_cstates __initconst = { diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index bd34124449b0..17c3564d087a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.module = THIS_MODULE; return 0; } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 97c246f84dea..8c4ccdc3a3f3 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) .start = uncore_pmu_event_start, .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, + .module = THIS_MODULE, }; } else { pmu->pmu = *pmu->type->pmu; From 60448b077ed93d227e6c117a9e87db76ff0c1911 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Jan 2017 15:44:52 -0600 Subject: [PATCH 065/258] ASoC: Intel: bytcr-rt5640: fix settings in internal clock mode Frequency value of zero did not make sense, use same 24.576MHz setting and only change the clock source in idle mode Suggested-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index e33e4777a65c..8d2fb2d6f532 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -142,7 +142,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, * for Jack detection and button press */ ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_RCCLK, - 0, + 48000 * 512, SND_SOC_CLOCK_IN); if (!ret) { if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && priv->mclk) From 08f9572671c8047e7234cbf150869aa3c3d59a97 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 5 Jan 2017 14:25:59 +0100 Subject: [PATCH 066/258] HID: ignore Petzl USB headlamp This headlamp contains a dummy HID descriptor which pretends to be a mouse-like device, but can't be used as a mouse at all. Reported-by: Lukas Ocilka Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index cff060b56da9..ea36b557d5ee 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2496,6 +2496,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) }, + { HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) }, #if IS_ENABLED(CONFIG_MOUSE_SYNAPTICS_USB) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 54bd22dc1411..f46f2c5117fa 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -816,6 +816,9 @@ #define USB_VENDOR_ID_PETALYNX 0x18b1 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE 0x0037 +#define USB_VENDOR_ID_PETZL 0x2122 +#define USB_DEVICE_ID_PETZL_HEADLAMP 0x1234 + #define USB_VENDOR_ID_PHILIPS 0x0471 #define USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE 0x0617 From 28ca833ecf89c585a9543fb21aef6b2bdbbaa48a Mon Sep 17 00:00:00 2001 From: JackieLiu Date: Tue, 13 Dec 2016 13:55:27 +0800 Subject: [PATCH 067/258] md/raid5-cache: removes unnecessary write-through mode judgments The write-through mode has been returned in front of the function, do not need to do it again. Signed-off-by: JackieLiu Reviewed-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index d7bfb6fc8aef..49aea4231084 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2418,9 +2418,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, if (do_wakeup) wake_up(&conf->wait_for_overlap); - if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) - return; - spin_lock_irq(&conf->log->stripe_in_journal_lock); list_del_init(&sh->r5c); spin_unlock_irq(&conf->log->stripe_in_journal_lock); From 3c66abbaaf69671dfd3eb9fa7740b5d7ec688231 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 14 Dec 2016 15:38:01 -0800 Subject: [PATCH 068/258] md/r5cache: simplify handling of sh->log_start in recovery We only need to update sh->log_start at the end of recovery, which is r5c_recovery_rewrite_data_only_stripes(), so it is not necessary to set it before that. In this patch, log_start is removed from r5c_recovery_alloc_stripe(). After updating all sh->log_start, rewrite_data_only_stripes() also updates log->next_checkpoints to the last sh->log_start. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 49aea4231084..b178a8fef266 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1682,8 +1682,7 @@ out: static struct stripe_head * r5c_recovery_alloc_stripe(struct r5conf *conf, - sector_t stripe_sect, - sector_t log_start) + sector_t stripe_sect) { struct stripe_head *sh; @@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf, return NULL; /* no more stripe available */ r5l_recovery_reset_stripe(sh); - sh->log_start = log_start; return sh; } @@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, stripe_sect); if (!sh) { - sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos); + sh = r5c_recovery_alloc_stripe(conf, stripe_sect); /* * cannot get stripe from raid5_get_active_stripe * try replay some stripes @@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, r5c_recovery_replay_stripes( cached_stripe_list, ctx); sh = r5c_recovery_alloc_stripe( - conf, stripe_sect, ctx->pos); + conf, stripe_sect); } if (!sh) { pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", @@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, conf->min_nr_stripes * 2); raid5_set_cache_size(mddev, conf->min_nr_stripes * 2); - sh = r5c_recovery_alloc_stripe( - conf, stripe_sect, ctx->pos); + sh = r5c_recovery_alloc_stripe(conf, + stripe_sect); } if (!sh) { pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", @@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, if (!test_bit(STRIPE_R5C_CACHING, &sh->state) && test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) { r5l_recovery_replay_one_stripe(conf, sh, ctx); - sh->log_start = ctx->pos; list_move_tail(&sh->lru, cached_stripe_list); } r5l_recovery_load_data(log, sh, ctx, payload, @@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log, set_bit(R5_UPTODATE, &dev->flags); } } - list_add_tail(&sh->r5c, &log->stripe_in_journal_list); - atomic_inc(&log->stripe_in_journal_count); } /* @@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, struct stripe_head *sh, *next; struct mddev *mddev = log->rdev->mddev; struct page *page; + sector_t next_checkpoint = MaxSector; page = alloc_page(GFP_KERNEL); if (!page) { @@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, return -ENOMEM; } + WARN_ON(list_empty(&ctx->cached_list)); + list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { struct r5l_meta_block *mb; int i; @@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, REQ_OP_WRITE, REQ_FUA, false); sh->log_start = ctx->pos; + list_add_tail(&sh->r5c, &log->stripe_in_journal_list); + atomic_inc(&log->stripe_in_journal_count); ctx->pos = write_pos; ctx->seq += 1; - + next_checkpoint = sh->log_start; list_del_init(&sh->lru); raid5_release_stripe(sh); } + log->next_checkpoint = next_checkpoint; __free_page(page); return 0; } @@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log) struct r5l_recovery_ctx ctx; int ret; sector_t pos; - struct stripe_head *sh; ctx.pos = log->last_checkpoint; ctx.seq = log->last_cp_seq; @@ -2164,9 +2164,6 @@ static int r5l_recovery_log(struct r5l_log *log) log->next_checkpoint = ctx.pos; r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); - } else { - sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru); - log->next_checkpoint = sh->log_start; } if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) From d2250f105f18a43fdab17421bd80b0ffc9fcc53f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 14 Dec 2016 15:38:02 -0800 Subject: [PATCH 069/258] md/r5cache: assign conf->log before r5l_load_log() r5l_load_log() calls functions that requires a proper conf->log, for example, r5c_is_writeback(). Therefore, we should set conf->log before calling r5l_load_log(). If r5l_load_log() fails, conf->log is set back to NULL. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index b178a8fef266..bff1b4a949e8 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2633,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) spin_lock_init(&log->stripe_in_journal_lock); atomic_set(&log->stripe_in_journal_count, 0); + rcu_assign_pointer(conf->log, log); + if (r5l_load_log(log)) goto error; - rcu_assign_pointer(conf->log, log); set_bit(MD_HAS_JOURNAL, &conf->mddev->flags); return 0; error: + rcu_assign_pointer(conf->log, NULL); md_unregister_thread(&log->reclaim_thread); reclaim_thread: mempool_destroy(log->meta_pool); From 99f17890f04cff0262de7393c60a2f6d9c9c7e71 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 23 Dec 2016 00:52:30 +0000 Subject: [PATCH 070/258] md/r5cache: fix spelling mistake on "recoverying" Trivial fix to spelling mistake "recoverying" to "recovering" in pr_dbg message. Signed-off-by: Colin Ian King Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index bff1b4a949e8..0e8ed2c327b0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2170,7 +2170,7 @@ static int r5l_recovery_log(struct r5l_log *log) pr_debug("md/raid:%s: starting from clean shutdown\n", mdname(mddev)); else { - pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n", + pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", mdname(mddev), ctx.data_only_stripes, ctx.data_parity_stripes); From 394ed8e4743b0cfc5496fe49059fbfc2bc8eae35 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 4 Jan 2017 16:10:19 -0800 Subject: [PATCH 071/258] md: cleanup mddev flag clear for takeover Commit 6995f0b (md: takeover should clear unrelated bits) clear unrelated bits, but it's quite fragile. To avoid error in the future, define a macro for unsupported mddev flags for each raid type and use it to clear unsupported mddev flags. This should be less error-prone. Suggested-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.h | 8 ++++++++ drivers/md/raid0.c | 12 ++++++++---- drivers/md/raid1.c | 8 ++++++-- drivers/md/raid5.c | 5 ++++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/md/md.h b/drivers/md/md.h index e38936d05df1..2a514036a83d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); struct md_cluster_info; +/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */ enum mddev_flags { MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ MD_CLOSING, /* If set, we are closing the array, do not open @@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev) { return mddev->cluster_info && mddev->bitmap_info.nodes > 1; } + +/* clear unsupported mddev_flags */ +static inline void mddev_clear_unsupported_flags(struct mddev *mddev, + unsigned long unsupported_flags) +{ + mddev->flags &= ~unsupported_flags; +} #endif /* _MD_MD_H */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a162fedeb51a..848365d474f3 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -26,6 +26,11 @@ #include "raid0.h" #include "raid5.h" +#define UNSUPPORTED_MDDEV_FLAGS \ + ((1L << MD_HAS_JOURNAL) | \ + (1L << MD_JOURNAL_CLEAN) | \ + (1L << MD_FAILFAST_SUPPORTED)) + static int raid0_congested(struct mddev *mddev, int bits) { struct r0conf *conf = mddev->private; @@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) mddev->delta_disks = -1; /* make sure it will be not marked as dirty */ mddev->recovery_cp = MaxSector; - clear_bit(MD_HAS_JOURNAL, &mddev->flags); - clear_bit(MD_JOURNAL_CLEAN, &mddev->flags); + mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); @@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) mddev->degraded = 0; /* make sure it will be not marked as dirty */ mddev->recovery_cp = MaxSector; - clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); + mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); return priv_conf; @@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev) mddev->raid_disks = 1; /* make sure it will be not marked as dirty */ mddev->recovery_cp = MaxSector; - clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); + mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); return priv_conf; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 14422407e520..7b0f647bcccb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -42,6 +42,10 @@ #include "raid1.h" #include "bitmap.h" +#define UNSUPPORTED_MDDEV_FLAGS \ + ((1L << MD_HAS_JOURNAL) | \ + (1L << MD_JOURNAL_CLEAN)) + /* * Number of guaranteed r1bios in case of extreme VM load: */ @@ -3257,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev) if (!IS_ERR(conf)) { /* Array must appear to be quiesced */ conf->array_frozen = 1; - clear_bit(MD_HAS_JOURNAL, &mddev->flags); - clear_bit(MD_JOURNAL_CLEAN, &mddev->flags); + mddev_clear_unsupported_flags(mddev, + UNSUPPORTED_MDDEV_FLAGS); } return conf; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 06d7279bdd04..7b1da6e95a56 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -62,6 +62,8 @@ #include "raid0.h" #include "bitmap.h" +#define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED) + #define cpu_to_group(cpu) cpu_to_node(cpu) #define ANY_GROUP NUMA_NO_NODE @@ -7830,7 +7832,8 @@ static void *raid5_takeover_raid1(struct mddev *mddev) ret = setup_conf(mddev); if (!IS_ERR_VALUE(ret)) - clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); + mddev_clear_unsupported_flags(mddev, + UNSUPPORTED_MDDEV_FLAGS); return ret; } From 1c3415a06b1016a596bfe59e0cfee56c773aa958 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Jan 2017 14:14:54 -0800 Subject: [PATCH 072/258] Input: elants_i2c - avoid divide by 0 errors on bad touchscreen data The following crash may be seen if bad data is received from the touchscreen. [ 2189.425150] elants_i2c i2c-ELAN0001:00: unknown packet ff ff ff ff [ 2189.430738] divide error: 0000 [#1] PREEMPT SMP [ 2189.434679] gsmi: Log Shutdown Reason 0x03 [ 2189.434689] Modules linked in: ip6t_REJECT nf_reject_ipv6 rfcomm evdi uinput uvcvideo cmac videobuf2_vmalloc videobuf2_memops snd_hda_codec_hdmi i2c_dev videobuf2_core snd_soc_sst_cht_bsw_rt5645 snd_hda_intel snd_intel_sst_acpi btusb btrtl btbcm btintel bluetooth snd_soc_sst_acpi snd_hda_codec snd_intel_sst_core snd_hwdep snd_soc_sst_mfld_platform snd_hda_core snd_soc_rt5645 memconsole_x86_legacy memconsole zram snd_soc_rl6231 fuse ip6table_filter iwlmvm iwlwifi iwl7000_mac80211 cfg80211 iio_trig_sysfs joydev cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer kfifo_buf industrialio snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device ppp_async ppp_generic slhc tun [ 2189.434866] CPU: 0 PID: 106 Comm: irq/184-ELAN000 Tainted: G W 3.18.0-13101-g57e8190 #1 [ 2189.434883] Hardware name: GOOGLE Ultima, BIOS Google_Ultima.7287.131.43 07/20/2016 [ 2189.434898] task: ffff88017a0b6d80 ti: ffff88017a2bc000 task.ti: ffff88017a2bc000 [ 2189.434913] RIP: 0010:[] [] elants_i2c_irq+0x190/0x200 [ 2189.434937] RSP: 0018:ffff88017a2bfd98 EFLAGS: 00010293 [ 2189.434948] RAX: 0000000000000000 RBX: ffff88017a967828 RCX: ffff88017a9678e8 [ 2189.434962] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000000 [ 2189.434975] RBP: ffff88017a2bfdd8 R08: 00000000000003e8 R09: 0000000000000000 [ 2189.434989] R10: 0000000000000000 R11: 000000000044a2bd R12: ffff88017a991800 [ 2189.435001] R13: ffffffffbe8a2a53 R14: ffff88017a0b6d80 R15: ffff88017a0b6d80 [ 2189.435011] FS: 0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000 [ 2189.435022] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 2189.435030] CR2: 00007f678d94b000 CR3: 000000003f41a000 CR4: 00000000001007f0 [ 2189.435039] Stack: [ 2189.435044] ffff88017a2bfda8 ffff88017a9678e8 646464647a2bfdd8 0000000006e09574 [ 2189.435060] 0000000000000000 ffff88017a088b80 ffff88017a921000 ffffffffbe8a2a53 [ 2189.435074] ffff88017a2bfe08 ffffffffbe8a2a73 ffff88017a0b6d80 0000000006e09574 [ 2189.435089] Call Trace: [ 2189.435101] [] ? irq_thread_dtor+0xa9/0xa9 [ 2189.435112] [] irq_thread_fn+0x20/0x40 [ 2189.435123] [] irq_thread+0x14e/0x222 [ 2189.435135] [] ? __schedule+0x3b3/0x57a [ 2189.435145] [] ? wake_threads_waitq+0x2d/0x2d [ 2189.435156] [] ? irq_thread_fn+0x40/0x40 [ 2189.435168] [] kthread+0x10e/0x116 [ 2189.435178] [] ? __kthread_parkme+0x67/0x67 [ 2189.435189] [] ret_from_fork+0x7c/0xb0 [ 2189.435199] [] ? __kthread_parkme+0x67/0x67 [ 2189.435208] Code: ff ff eb 73 0f b6 bb c1 00 00 00 83 ff 03 7e 13 49 8d 7c 24 20 ba 04 00 00 00 48 c7 c6 8a cd 21 bf eb 4d 0f b6 83 c2 00 00 00 99 ff 83 f8 37 75 15 48 6b f7 37 4c 8d a3 c4 00 00 00 4c 8d ac [ 2189.435312] RIP [] elants_i2c_irq+0x190/0x200 [ 2189.435323] RSP [ 2189.435350] ---[ end trace f4945345a75d96dd ]--- [ 2189.443841] Kernel panic - not syncing: Fatal exception [ 2189.444307] Kernel Offset: 0x3d800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 2189.444519] gsmi: Log Shutdown Reason 0x02 The problem was seen with a 3.18 based kernel, but there is no reason to believe that the upstream code is safe. Fixes: 66aee90088da2 ("Input: add support for Elan eKTH I2C touchscreens") Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 02aec284deca..3e6003d32e56 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -914,9 +914,9 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev) case QUEUE_HEADER_NORMAL: report_count = ts->buf[FW_HDR_COUNT]; - if (report_count > 3) { + if (report_count == 0 || report_count > 3) { dev_err(&client->dev, - "too large report count: %*ph\n", + "bad report count: %*ph\n", HEADER_SIZE, ts->buf); break; } From 9698b6f473555a722bf81a3371998427d5d27bde Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 14 Dec 2016 13:20:41 -0800 Subject: [PATCH 073/258] scsi: fnic: Avoid sending reset to firmware when another reset is in progress This fix is to avoid calling fnic_fw_reset_handler through fnic_host_reset when a finc reset is alreay in progress. Signed-off-by: Satish Kharat Signed-off-by: Sesidhar Baddela Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 1 + drivers/scsi/fnic/fnic_scsi.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 9ddc9200e0a4..9e4b7709043e 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -248,6 +248,7 @@ struct fnic { struct completion *remove_wait; /* device remove thread blocks */ atomic_t in_flight; /* io counter */ + bool internal_reset_inprogress; u32 _reserved; /* fill hole */ unsigned long state_flags; /* protected by host lock */ enum fnic_state state; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 2544a37ece0a..adb3d5871e74 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2581,6 +2581,19 @@ int fnic_host_reset(struct scsi_cmnd *sc) unsigned long wait_host_tmo; struct Scsi_Host *shost = sc->device->host; struct fc_lport *lp = shost_priv(shost); + struct fnic *fnic = lport_priv(lp); + unsigned long flags; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->internal_reset_inprogress == 0) { + fnic->internal_reset_inprogress = 1; + } else { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "host reset in progress skipping another host reset\n"); + return SUCCESS; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); /* * If fnic_reset is successful, wait for fabric login to complete @@ -2601,6 +2614,9 @@ int fnic_host_reset(struct scsi_cmnd *sc) } } + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->internal_reset_inprogress = 0; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); return ret; } From 0371adcdaca92912baaa3256ed13e058a016e62d Mon Sep 17 00:00:00 2001 From: Burak Ok Date: Wed, 21 Dec 2016 14:45:53 +0100 Subject: [PATCH 074/258] scsi: snic: Return error code on memory allocation failure If a call to mempool_create_slab_pool() in snic_probe() returns NULL, return -ENOMEM to indicate failure. mempool_creat_slab_pool() only fails if it cannot allocate memory. https://bugzilla.kernel.org/show_bug.cgi?id=189061 Reported-by: bianpan2010@ruc.edu.cn Signed-off-by: Burak Ok Signed-off-by: Andreas Schaertl Acked-by: Narsimhulu Musini Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/snic_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c index 396b32dca074..7cf70aaec0ba 100644 --- a/drivers/scsi/snic/snic_main.c +++ b/drivers/scsi/snic/snic_main.c @@ -591,6 +591,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "dflt sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_res; } @@ -601,6 +602,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "max sgl pool creation failed\n"); + ret = -ENOMEM; goto err_free_dflt_sgl_pool; } @@ -611,6 +613,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pool) { SNIC_HOST_ERR(shost, "snic tmreq info pool creation failed.\n"); + ret = -ENOMEM; goto err_free_max_sgl_pool; } From 2d1148f0f45079d25a0fa0d67e4fdb2a656d12fb Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 23 Dec 2016 20:40:19 -0800 Subject: [PATCH 075/258] scsi: bfa: Increase requested firmware version to 3.2.5.1 bna & bfa firmware version 3.2.5.1 was submitted to linux-firmware on Feb 17 19:10:20 2015 -0500 in 0ab54ff1dc ("linux-firmware: Add QLogic BR Series Adapter Firmware"). bna was updated to use the newer firmware on Feb 19 16:02:32 2015 -0500 in 3f307c3d70 ("bna: Update the Driver and Firmware Version") bfa was not updated. I presume this was an oversight but it broke support for bfa+bna cards such as the following 04:00.0 Fibre Channel [0c04]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.1 Fibre Channel [0c04]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.2 Ethernet controller [0200]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) 04:00.3 Ethernet controller [0200]: Brocade Communications Systems, Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01) Currently, if the bfa module is loaded first, bna fails to probe the respective devices with [ 215.026787] bna: QLogic BR-series 10G Ethernet driver - version: 3.2.25.1 [ 215.043707] bna 0000:04:00.2: bar0 mapped to ffffc90001fc0000, len 262144 [ 215.060656] bna 0000:04:00.2: initialization failed err=1 [ 215.073893] bna 0000:04:00.3: bar0 mapped to ffffc90002040000, len 262144 [ 215.090644] bna 0000:04:00.3: initialization failed err=1 Whereas if bna is loaded first, bfa fails with [ 249.592109] QLogic BR-series BFA FC/FCOE SCSI driver - version: 3.2.25.0 [ 249.610738] bfa 0000:04:00.0: Running firmware version is incompatible with the driver version [ 249.833513] bfa 0000:04:00.0: bfa init failed [ 249.833919] scsi host6: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.0 driver: 3.2.25.0 [ 249.841446] bfa 0000:04:00.1: Running firmware version is incompatible with the driver version [ 250.045449] bfa 0000:04:00.1: bfa init failed [ 250.045962] scsi host7: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.1 driver: 3.2.25.0 Increase bfa's requested firmware version. Also increase the driver version. I only tested that all of the devices probe without error. Reported-by: Tim Ehlers Signed-off-by: Benjamin Poirier Acked-by: Rasesh Mody Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad.c | 6 +++--- drivers/scsi/bfa/bfad_drv.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 9d253cb83ee7..e70410beb83a 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -64,9 +64,9 @@ int max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS; u32 bfi_image_cb_size, bfi_image_ct_size, bfi_image_ct2_size; u32 *bfi_image_cb, *bfi_image_ct, *bfi_image_ct2; -#define BFAD_FW_FILE_CB "cbfw-3.2.3.0.bin" -#define BFAD_FW_FILE_CT "ctfw-3.2.3.0.bin" -#define BFAD_FW_FILE_CT2 "ct2fw-3.2.3.0.bin" +#define BFAD_FW_FILE_CB "cbfw-3.2.5.1.bin" +#define BFAD_FW_FILE_CT "ctfw-3.2.5.1.bin" +#define BFAD_FW_FILE_CT2 "ct2fw-3.2.5.1.bin" static u32 *bfad_load_fwimg(struct pci_dev *pdev); static void bfad_free_fwimg(void); diff --git a/drivers/scsi/bfa/bfad_drv.h b/drivers/scsi/bfa/bfad_drv.h index f9e862093a25..cfcfff48e8e1 100644 --- a/drivers/scsi/bfa/bfad_drv.h +++ b/drivers/scsi/bfa/bfad_drv.h @@ -58,7 +58,7 @@ #ifdef BFA_DRIVER_VERSION #define BFAD_DRIVER_VERSION BFA_DRIVER_VERSION #else -#define BFAD_DRIVER_VERSION "3.2.25.0" +#define BFAD_DRIVER_VERSION "3.2.25.1" #endif #define BFAD_PROTO_NAME FCPI_NAME From a33d331761bc5dd330499ca5ceceb67f0640a8e6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 5 Jan 2017 10:26:38 +0100 Subject: [PATCH 076/258] x86/CPU/AMD: Fix Bulldozer topology The following commit: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id") ... broke the initial strategy for Bulldozer-based cores' topology, where we consider each thread of a compute unit a standalone core and not a HT or SMT thread. Revert to the firmware-supplied core_id numbering and do not make them thread siblings as we don't consider them for such even if they technically are, more or less. Reported-and-tested-by: Brice Goglin Tested-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: # v4.6+ Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id") Link: http://lkml.kernel.org/r/20170105092638.5247-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71cae73a5076..1d3167269a67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 7; - - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->x86_max_cores /= smp_num_siblings; - c->cpu_core_id = ebx & 0xff; + node_id = cpuid_ecx(0x8000001e) & 7; /* * We may have multiple LLCs if L3 caches exist, so check if we From 1ebb71143758f45dc0fa76e2f48429e13b16d110 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Jan 2017 15:33:36 +0100 Subject: [PATCH 077/258] HID: hid-cypress: validate length of report Make sure we have enough of a report structure to validate before looking at it. Reported-by: Benoit Camredon Tested-by: Benoit Camredon Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jiri Kosina --- drivers/hid/hid-cypress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 1b764d1745f3..1689568b597d 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -39,6 +39,9 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) return rdesc; + if (*rsize < 4) + return rdesc; + for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { rdesc[i] = 0x19; From bc65a326c579e93a5c2120a65ede72f11369ee5a Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Mon, 2 Jan 2017 09:50:05 +0530 Subject: [PATCH 078/258] ASoC: Intel: Skylake: Release FW ctx in cleanup Saved firmware ctx was not never released, so release Firmware ctx in cleanup routine. Signed-off-by: Jeeja KP Acked-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-sst.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c index 8fc3178bc79c..b30bd384c8d3 100644 --- a/sound/soc/intel/skylake/skl-sst.c +++ b/sound/soc/intel/skylake/skl-sst.c @@ -515,6 +515,9 @@ EXPORT_SYMBOL_GPL(skl_sst_init_fw); void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx) { + + if (ctx->dsp->fw) + release_firmware(ctx->dsp->fw); skl_clear_module_table(ctx->dsp); skl_freeup_uuid_list(ctx); skl_ipc_free(&ctx->ipc); From 9f169b9f52a4afccdab7a7d2311b0c53a78a1e6b Mon Sep 17 00:00:00 2001 From: Patrick Lai Date: Sat, 31 Dec 2016 22:44:39 -0800 Subject: [PATCH 079/258] ASoC: dpcm: Avoid putting stream state to STOP when FE stream is paused When multiple front-ends are using the same back-end, putting state of a front-end to STOP state upon receiving pause command will result in backend stream getting released by DPCM framework unintentionally. In order to avoid backend to be released when another active front-end stream is present, put the stream state to PAUSED state instead of STOP state. Signed-off-by: Patrick Lai Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e7a1eaa2772f..6aba14009c92 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2184,9 +2184,11 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; + break; } out: From 20b1e22d01a4b0b11d3a1066e9feb04be38607ec Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 5 Jan 2017 13:51:29 +0100 Subject: [PATCH 080/258] x86/efi: Don't allocate memmap through memblock after mm_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the following commit: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data") ... efi_bgrt_init() calls into the memblock allocator through efi_mem_reserve() => efi_arch_mem_reserve() *after* mm_init() has been called. Indeed, KASAN reports a bad read access later on in efi_free_boot_services(): BUG: KASAN: use-after-free in efi_free_boot_services+0xae/0x24c at addr ffff88022de12740 Read of size 4 by task swapper/0/0 page:ffffea0008b78480 count:0 mapcount:-127 mapping: (null) index:0x1 flags: 0x5fff8000000000() [...] Call Trace: dump_stack+0x68/0x9f kasan_report_error+0x4c8/0x500 kasan_report+0x58/0x60 __asan_load4+0x61/0x80 efi_free_boot_services+0xae/0x24c start_kernel+0x527/0x562 x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x157/0x17a start_cpu+0x5/0x14 The instruction at the given address is the first read from the memmap's memory, i.e. the read of md->type in efi_free_boot_services(). Note that the writes earlier in efi_arch_mem_reserve() don't splat because they're done through early_memremap()ed addresses. So, after memblock is gone, allocations should be done through the "normal" page allocator. Introduce a helper, efi_memmap_alloc() for this. Use it from efi_arch_mem_reserve(), efi_free_boot_services() and, for the sake of consistency, from efi_fake_memmap() as well. Note that for the latter, the memmap allocations cease to be page aligned. This isn't needed though. Tested-by: Dan Williams Signed-off-by: Nicolai Stange Reviewed-by: Ard Biesheuvel Cc: # v4.9 Cc: Dave Young Cc: Linus Torvalds Cc: Matt Fleming Cc: Mika Penttilä Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data") Link: http://lkml.kernel.org/r/20170105125130.2815-1-nicstange@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 4 ++-- drivers/firmware/efi/fake_mem.c | 3 +-- drivers/firmware/efi/memmap.c | 38 +++++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 10aca63a50d7..30031d5293c4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Could not allocate boot services memmap\n"); return; @@ -355,7 +355,7 @@ void __init efi_free_boot_services(void) } new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Failed to allocate new EFI memmap\n"); return; diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 520a40e5e0e4..6c7d60c239b5 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -71,8 +71,7 @@ void __init efi_fake_memmap(void) } /* allocate memory for new EFI memmap */ - new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map, - PAGE_SIZE); + new_memmap_phy = efi_memmap_alloc(new_nr_map); if (!new_memmap_phy) return; diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index f03ddecd232b..78686443cb37 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -9,6 +9,44 @@ #include #include #include +#include +#include + +static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) +{ + return memblock_alloc(size, 0); +} + +static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) +{ + unsigned int order = get_order(size); + struct page *p = alloc_pages(GFP_KERNEL, order); + + if (!p) + return 0; + + return PFN_PHYS(page_to_pfn(p)); +} + +/** + * efi_memmap_alloc - Allocate memory for the EFI memory map + * @num_entries: Number of entries in the allocated map. + * + * Depending on whether mm_init() has already been invoked or not, + * either memblock or "normal" page allocation is used. + * + * Returns the physical address of the allocated memory map on + * success, zero on failure. + */ +phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) +{ + unsigned long size = num_entries * efi.memmap.desc_size; + + if (slab_is_available()) + return __efi_memmap_alloc_late(size); + + return __efi_memmap_alloc_early(size); +} /** * __efi_memmap_init - Common code for mapping the EFI memory map diff --git a/include/linux/efi.h b/include/linux/efi.h index a07a476178cd..0c5420208c40 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -950,6 +950,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); +extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); From eeb0d56fab4cd7848cf2be6704fa48900dbc1381 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Dec 2016 16:47:43 +0100 Subject: [PATCH 081/258] mac80211: implement multicast forwarding on fast-RX path In AP (or VLAN) mode, when unicast 802.11 packets are received, they might actually be multicast after conversion. In this case the fast-RX path didn't handle them properly to send them back to the wireless medium. Implement that by copying the SKB and sending it back out. The possible alternative would be to just punt the packet back to the regular (slow) RX path, but since we have almost all of the required code here already it's not so complicated to add here. Punting it back would also mean acquiring the spinlock, which would be bad for the stated purpose of the fast-RX path, to enable well-performing parallel RX. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3e289a64ed43..c037c5bb6167 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3942,21 +3942,31 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u64_stats_update_end(&stats->syncp); if (fast_rx->internal_forward) { - struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + struct sk_buff *xmit_skb = NULL; + bool multicast = is_multicast_ether_addr(skb->data); - if (dsta) { + if (multicast) { + xmit_skb = skb_copy(skb, GFP_ATOMIC); + } else if (sta_info_get(rx->sdata, skb->data)) { + xmit_skb = skb; + skb = NULL; + } + + if (xmit_skb) { /* * Send to wireless media and increase priority by 256 * to keep the received priority instead of * reclassifying the frame (see cfg80211_classify8021d). */ - skb->priority += 256; - skb->protocol = htons(ETH_P_802_3); - skb_reset_network_header(skb); - skb_reset_mac_header(skb); - dev_queue_xmit(skb); - return true; + xmit_skb->priority += 256; + xmit_skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(xmit_skb); + skb_reset_mac_header(xmit_skb); + dev_queue_xmit(xmit_skb); } + + if (!skb) + return true; } /* deliver to local stack */ From ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 6 Jan 2017 14:12:51 +0100 Subject: [PATCH 082/258] btrfs: fix crash when tracepoint arguments are freed by wq callbacks Enabling btrfs tracepoints leads to instant crash, as reported. The wq callbacks could free the memory and the tracepoints started to dereference the members to get to fs_info. The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2 removed the tracepoints but we could preserve them by passing only the required data in a safe way. Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events") CC: stable@vger.kernel.org # 4.8+ Reported-by: Sebastian Andrzej Siewior Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/async-thread.c | 15 +++++++++++---- include/trace/events/btrfs.h | 22 +++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 63d197724519..ff0b0be92d61 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) unsigned long flags; while (1) { + void *wtag; + spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); /* - * we don't want to call the ordered free functions - * with the lock held though + * We don't want to call the ordered free functions with the + * lock held though. Save the work as tag for the trace event, + * because the callback could free the structure. */ + wtag = work; work->ordered_free(work); - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } spin_unlock_irqrestore(lock, flags); } @@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) static void normal_work_helper(struct btrfs_work *work) { struct __btrfs_workqueue *wq; + void *wtag; int need_order = 0; /* @@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work) if (work->ordered_func) need_order = 1; wq = work->wq; + /* Safe for tracepoints in case work gets freed by the callback */ + wtag = work; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work) run_ordered_work(wq); } if (!need_order) - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index c14bed4ab097..b09225c77676 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1157,22 +1157,26 @@ DECLARE_EVENT_CLASS(btrfs__work, __entry->func, __entry->ordered_func, __entry->ordered_free) ); -/* For situiations that the work is freed */ +/* + * For situiations when the work is freed, we pass fs_info and a tag that that + * matches address of the work structure so it can be paired with the + * scheduling event. + */ DECLARE_EVENT_CLASS(btrfs__work__done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work), + TP_ARGS(fs_info, wtag), TP_STRUCT__entry_btrfs( - __field( void *, work ) + __field( void *, wtag ) ), - TP_fast_assign_btrfs(btrfs_work_owner(work), - __entry->work = work; + TP_fast_assign_btrfs(fs_info, + __entry->wtag = wtag; ), - TP_printk_btrfs("work->%p", __entry->work) + TP_printk_btrfs("work->%p", __entry->wtag) ); DEFINE_EVENT(btrfs__work, btrfs_work_queued, @@ -1191,9 +1195,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched, DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work) + TP_ARGS(fs_info, wtag) ); DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, From 92a1bf76a89ad338f00eb9a2c7689a3907fbcaad Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 17 Nov 2016 15:00:50 -0800 Subject: [PATCH 083/258] Btrfs: add 'inode' for extent map tracepoint 'inode' is an important field for btrfs_get_extent, lets trace it. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- include/trace/events/btrfs.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a713d9d324b0..fab189c67eff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7059,7 +7059,7 @@ insert: write_unlock(&em_tree->lock); out: - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, inode, em); btrfs_free_path(path); if (trans) { diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index b09225c77676..3048f5205363 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -184,14 +184,16 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, TRACE_EVENT_CONDITION(btrfs_get_extent, - TP_PROTO(struct btrfs_root *root, struct extent_map *map), + TP_PROTO(struct btrfs_root *root, struct inode *inode, + struct extent_map *map), - TP_ARGS(root, map), + TP_ARGS(root, inode, map), TP_CONDITION(map), TP_STRUCT__entry_btrfs( __field( u64, root_objectid ) + __field( u64, ino ) __field( u64, start ) __field( u64, len ) __field( u64, orig_start ) @@ -204,7 +206,8 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, TP_fast_assign_btrfs(root->fs_info, __entry->root_objectid = root->root_key.objectid; - __entry->start = map->start; + __entry->ino = btrfs_ino(inode); + __entry->start = map->start; __entry->len = map->len; __entry->orig_start = map->orig_start; __entry->block_start = map->block_start; @@ -214,11 +217,12 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->compress_type = map->compress_type; ), - TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu, " + TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, " "orig_start = %llu, block_start = %llu(%s), " "block_len = %llu, flags = %s, refs = %u, " "compress_type = %u", show_root_type(__entry->root_objectid), + (unsigned long long)__entry->ino, (unsigned long long)__entry->start, (unsigned long long)__entry->len, (unsigned long long)__entry->orig_start, From 7856654842bdbebc0fbcbf51573da5d70a787aba Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:10:10 -0800 Subject: [PATCH 084/258] Btrfs: add truncated_len for ordered extent tracepoints This can help us monitor truncated ordered extents. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 3048f5205363..2026a89786b0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -263,6 +263,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __field( int, compress_type ) __field( int, refs ) __field( u64, root_objectid ) + __field( u64, truncated_len ) ), TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), @@ -277,10 +278,12 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __entry->refs = atomic_read(&ordered->refs); __entry->root_objectid = BTRFS_I(inode)->root->root_key.objectid; + __entry->truncated_len = ordered->truncated_len; ), TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, " "start = %llu, len = %llu, disk_len = %llu, " + "truncated_len = %llu, " "bytes_left = %llu, flags = %s, compress_type = %d, " "refs = %d", show_root_type(__entry->root_objectid), @@ -289,6 +292,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, (unsigned long long)__entry->start, (unsigned long long)__entry->len, (unsigned long long)__entry->disk_len, + (unsigned long long)__entry->truncated_len, (unsigned long long)__entry->bytes_left, show_ordered_flags(__entry->flags), __entry->compress_type, __entry->refs) From 562a7a07bf61e2949f7cbdb6ac7537ad9e2794d1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 6 Jan 2017 15:51:36 +0100 Subject: [PATCH 085/258] btrfs: make tracepoint format strings more compact We've recently added the fsid to trace events, this makes the line quite long. To reduce the it again, remove extra spaces around = and remove ",". Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 112 +++++++++++++++++------------------ 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2026a89786b0..88d18a8ceb59 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -130,8 +130,8 @@ DECLARE_EVENT_CLASS(btrfs__inode, BTRFS_I(inode)->root->root_key.objectid; ), - TP_printk_btrfs("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, " - "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu", + TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu " + "disk_i_size=%llu last_trans=%llu logged_trans=%llu", show_root_type(__entry->root_objectid), (unsigned long long)__entry->generation, (unsigned long)__entry->ino, @@ -217,10 +217,10 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->compress_type = map->compress_type; ), - TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, " - "orig_start = %llu, block_start = %llu(%s), " - "block_len = %llu, flags = %s, refs = %u, " - "compress_type = %u", + TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu " + "orig_start=%llu block_start=%llu(%s) " + "block_len=%llu flags=%s refs=%u " + "compress_type=%u", show_root_type(__entry->root_objectid), (unsigned long long)__entry->ino, (unsigned long long)__entry->start, @@ -281,11 +281,11 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __entry->truncated_len = ordered->truncated_len; ), - TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, " - "start = %llu, len = %llu, disk_len = %llu, " - "truncated_len = %llu, " - "bytes_left = %llu, flags = %s, compress_type = %d, " - "refs = %d", + TP_printk_btrfs("root=%llu(%s) ino=%llu file_offset=%llu " + "start=%llu len=%llu disk_len=%llu " + "truncated_len=%llu " + "bytes_left=%llu flags=%s compress_type=%d " + "refs=%d", show_root_type(__entry->root_objectid), (unsigned long long)__entry->ino, (unsigned long long)__entry->file_offset, @@ -362,10 +362,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage, BTRFS_I(inode)->root->root_key.objectid; ), - TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, " - "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, " - "range_end = %llu, for_kupdate = %d, " - "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu", + TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu " + "nr_to_write=%ld pages_skipped=%ld range_start=%llu " + "range_end=%llu for_kupdate=%d " + "for_reclaim=%d range_cyclic=%d writeback_index=%lu", show_root_type(__entry->root_objectid), (unsigned long)__entry->ino, __entry->index, __entry->nr_to_write, __entry->pages_skipped, @@ -408,8 +408,8 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, BTRFS_I(page->mapping->host)->root->root_key.objectid; ), - TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, " - "end = %llu, uptodate = %d", + TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu " + "end=%llu uptodate=%d", show_root_type(__entry->root_objectid), (unsigned long)__entry->ino, (unsigned long)__entry->index, (unsigned long long)__entry->start, @@ -441,7 +441,7 @@ TRACE_EVENT(btrfs_sync_file, BTRFS_I(inode)->root->root_key.objectid; ), - TP_printk_btrfs("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d", + TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d", show_root_type(__entry->root_objectid), (unsigned long)__entry->ino, (unsigned long)__entry->parent, __entry->datasync) @@ -492,9 +492,9 @@ TRACE_EVENT(btrfs_add_block_group, __entry->create = create; ), - TP_printk("%pU: block_group offset = %llu, size = %llu, " - "flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, " - "create = %d", __entry->fsid, + TP_printk("%pU: block_group offset=%llu size=%llu " + "flags=%llu(%s) bytes_used=%llu bytes_super=%llu " + "create=%d", __entry->fsid, (unsigned long long)__entry->offset, (unsigned long long)__entry->size, (unsigned long long)__entry->flags, @@ -543,9 +543,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, __entry->seq = ref->seq; ), - TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, " - "parent = %llu(%s), ref_root = %llu(%s), level = %d, " - "type = %s, seq = %llu", + TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s " + "parent=%llu(%s) ref_root=%llu(%s) level=%d " + "type=%s seq=%llu", (unsigned long long)__entry->bytenr, (unsigned long long)__entry->num_bytes, show_ref_action(__entry->action), @@ -608,9 +608,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, __entry->seq = ref->seq; ), - TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, " - "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, " - "offset = %llu, type = %s, seq = %llu", + TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s " + "parent=%llu(%s) ref_root=%llu(%s) owner=%llu " + "offset=%llu type=%s seq=%llu", (unsigned long long)__entry->bytenr, (unsigned long long)__entry->num_bytes, show_ref_action(__entry->action), @@ -665,7 +665,7 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, __entry->is_data = head_ref->is_data; ), - TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d", + TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d", (unsigned long long)__entry->bytenr, (unsigned long long)__entry->num_bytes, show_ref_action(__entry->action), @@ -729,8 +729,8 @@ DECLARE_EVENT_CLASS(btrfs__chunk, __entry->root_objectid = fs_info->chunk_root->root_key.objectid; ), - TP_printk_btrfs("root = %llu(%s), offset = %llu, size = %llu, " - "num_stripes = %d, sub_stripes = %d, type = %s", + TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu " + "num_stripes=%d sub_stripes=%d type=%s", show_root_type(__entry->root_objectid), (unsigned long long)__entry->offset, (unsigned long long)__entry->size, @@ -779,8 +779,8 @@ TRACE_EVENT(btrfs_cow_block, __entry->cow_level = btrfs_header_level(cow); ), - TP_printk_btrfs("root = %llu(%s), refs = %d, orig_buf = %llu " - "(orig_level = %d), cow_buf = %llu (cow_level = %d)", + TP_printk_btrfs("root=%llu(%s) refs=%d orig_buf=%llu " + "(orig_level=%d) cow_buf=%llu (cow_level=%d)", show_root_type(__entry->root_objectid), __entry->refs, (unsigned long long)__entry->buf_start, @@ -844,7 +844,7 @@ TRACE_EVENT(btrfs_trigger_flush, __assign_str(reason, reason) ), - TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu", + TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu", __entry->fsid, __get_str(reason), __entry->flush, show_flush_action(__entry->flush), (unsigned long long)__entry->flags, @@ -887,8 +887,8 @@ TRACE_EVENT(btrfs_flush_space, __entry->ret = ret; ), - TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, " - "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state, + TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu " + "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state, show_flush_state(__entry->state), (unsigned long long)__entry->flags, __print_flags((unsigned long)__entry->flags, "|", @@ -913,7 +913,7 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent, __entry->len = len; ), - TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu", + TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu", show_root_type(BTRFS_EXTENT_TREE_OBJECTID), (unsigned long long)__entry->start, (unsigned long long)__entry->len) @@ -952,7 +952,7 @@ TRACE_EVENT(find_free_extent, __entry->data = data; ), - TP_printk_btrfs("root = %Lu(%s), len = %Lu, empty_size = %Lu, flags = %Lu(%s)", + TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)", show_root_type(BTRFS_EXTENT_TREE_OBJECTID), __entry->num_bytes, __entry->empty_size, __entry->data, __print_flags((unsigned long)__entry->data, "|", @@ -981,8 +981,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent, __entry->len = len; ), - TP_printk_btrfs("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), " - "start = %Lu, len = %Lu", + TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) " + "start=%Lu len=%Lu", show_root_type(BTRFS_EXTENT_TREE_OBJECTID), __entry->bg_objectid, __entry->flags, __print_flags((unsigned long)__entry->flags, @@ -1033,8 +1033,8 @@ TRACE_EVENT(btrfs_find_cluster, __entry->min_bytes = min_bytes; ), - TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu," - " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid, + TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu " + "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid, __entry->flags, __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS), __entry->start, @@ -1055,7 +1055,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup, __entry->bg_objectid = block_group->key.objectid; ), - TP_printk_btrfs("block_group = %Lu", __entry->bg_objectid) + TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid) ); TRACE_EVENT(btrfs_setup_cluster, @@ -1083,8 +1083,8 @@ TRACE_EVENT(btrfs_setup_cluster, __entry->bitmap = bitmap; ), - TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, " - "size = %Lu, max_size = %Lu, bitmap = %d", + TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu " + "size=%Lu max_size=%Lu bitmap=%d", __entry->bg_objectid, __entry->flags, __print_flags((unsigned long)__entry->flags, "|", @@ -1111,7 +1111,7 @@ TRACE_EVENT(alloc_extent_state, __entry->ip = IP ), - TP_printk("state=%p; mask = %s; caller = %pS", __entry->state, + TP_printk("state=%p mask=%s caller=%pS", __entry->state, show_gfp_flags(__entry->mask), (void *)__entry->ip) ); @@ -1131,7 +1131,7 @@ TRACE_EVENT(free_extent_state, __entry->ip = IP ), - TP_printk(" state=%p; caller = %pS", __entry->state, + TP_printk("state=%p caller=%pS", __entry->state, (void *)__entry->ip) ); @@ -1159,8 +1159,8 @@ DECLARE_EVENT_CLASS(btrfs__work, __entry->normal_work = &work->normal_work; ), - TP_printk_btrfs("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p," - " ordered_free=%p", + TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p " + "ordered_free=%p", __entry->work, __entry->normal_work, __entry->wq, __entry->func, __entry->ordered_func, __entry->ordered_free) ); @@ -1233,7 +1233,7 @@ DECLARE_EVENT_CLASS(btrfs__workqueue, __entry->high = high; ), - TP_printk_btrfs("name=%s%s, wq=%p", __get_str(name), + TP_printk_btrfs("name=%s%s wq=%p", __get_str(name), __print_flags(__entry->high, "", {(WQ_HIGHPRI), "-high"}), __entry->wq) @@ -1288,7 +1288,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_data_map, __entry->free_reserved = free_reserved; ), - TP_printk_btrfs("rootid=%llu, ino=%lu, free_reserved=%llu", + TP_printk_btrfs("rootid=%llu ino=%lu free_reserved=%llu", __entry->rootid, __entry->ino, __entry->free_reserved) ); @@ -1335,7 +1335,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, __entry->op = op; ), - TP_printk_btrfs("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s", + TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s", __entry->rootid, __entry->ino, __entry->start, __entry->len, __entry->reserved, __print_flags((unsigned long)__entry->op, "", @@ -1373,7 +1373,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref, __entry->reserved = reserved; ), - TP_printk_btrfs("root=%llu, reserved=%llu, op=free", + TP_printk_btrfs("root=%llu reserved=%llu op=free", __entry->ref_root, __entry->reserved) ); @@ -1400,7 +1400,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent, __entry->num_bytes = rec->num_bytes; ), - TP_printk_btrfs("bytenr = %llu, num_bytes = %llu", + TP_printk_btrfs("bytenr=%llu num_bytes=%llu", (unsigned long long)__entry->bytenr, (unsigned long long)__entry->num_bytes) ); @@ -1442,8 +1442,8 @@ TRACE_EVENT(btrfs_qgroup_account_extent, __entry->nr_new_roots = nr_new_roots; ), - TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, " - "nr_new_roots = %llu", + TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu " + "nr_new_roots=%llu", __entry->bytenr, __entry->num_bytes, __entry->nr_old_roots, @@ -1469,7 +1469,7 @@ TRACE_EVENT(qgroup_update_counters, __entry->cur_new_count = cur_new_count; ), - TP_printk_btrfs("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu", + TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu", __entry->qgid, __entry->cur_old_count, __entry->cur_new_count) From fac69d0efad08fc15e4dbfc116830782acc0dc9a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 7 Jan 2017 10:38:31 +0100 Subject: [PATCH 086/258] x86/boot: Add missing declaration of string functions Add the missing declarations of basic string functions to string.h to allow a clean build. Fixes: 5be865661516 ("String-handling functions for the new x86 setup code.") Signed-off-by: Nicholas Mc Guire Link: http://lkml.kernel.org/r/1483781911-21399-1-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- arch/x86/boot/string.c | 1 + arch/x86/boot/string.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index cc3bd583dce1..9e240fcba784 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -14,6 +14,7 @@ #include #include "ctype.h" +#include "string.h" int memcmp(const void *s1, const void *s2, size_t len) { diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 725e820602b1..113588ddb43f 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len); #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); + #endif /* BOOT_STRING_H */ From 02c5c03283c52157d336abf5e44ffcda10579fbf Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 27 Dec 2016 12:05:05 +0800 Subject: [PATCH 087/258] ASoC: rt5645: set sel_i2s_pre_div1 to 2 The i2s clock pre-divider 1 is used for both i2s1 and sysclk. The i2s1 is usually used for the main i2s and the pre-divider will be set in hw_params function. However, if i2s2 is used, the pre-divider is not set in the hw_params function and the default value of i2s clock pre-divider 1 is too high for sysclk and DMIC usage. Fix by overriding default divider value to 2. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=95681 Tested-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 10c2a564a715..1ac96ef9ee20 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3833,6 +3833,9 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, } } + regmap_update_bits(rt5645->regmap, RT5645_ADDA_CLK1, + RT5645_I2S_PD1_MASK, RT5645_I2S_PD1_2); + if (rt5645->pdata.jd_invert) { regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); From 4e2da44691cffbfffb1535f478d19bc2dca3e62b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:10 +0100 Subject: [PATCH 088/258] USB: serial: ch341: fix initial modem-control state DTR and RTS will be asserted by the tty-layer when the port is opened and deasserted on close (if HUPCL is set). Make sure the initial state is not-asserted before the port is first opened as well. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2597b83a8ae2..d133e72fe888 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -258,7 +258,6 @@ static int ch341_port_probe(struct usb_serial_port *port) spin_lock_init(&priv->lock); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(port->serial->dev, priv); if (r < 0) From a20047f36e2f6a1eea4f1fd261aaa55882369868 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:11 +0100 Subject: [PATCH 089/258] USB: serial: ch341: fix open and resume after B0 The private baud_rate variable is used to configure the port at open and reset-resume and must never be set to (and left at) zero or reset-resume and all further open attempts will fail. Fixes: aa91def41a7b ("USB: ch341: set tty baud speed according to tty struct") Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index d133e72fe888..6279df905c14 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -355,7 +355,6 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); - priv->baud_rate = baud_rate; ctrl = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX; switch (C_CSIZE(tty)) { @@ -388,6 +387,9 @@ static void ch341_set_termios(struct tty_struct *tty, spin_lock_irqsave(&priv->lock, flags); priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); spin_unlock_irqrestore(&priv->lock, flags); + + priv->baud_rate = baud_rate; + r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl); if (r < 0 && old_termios) { priv->baud_rate = tty_termios_baud_rate(old_termios); From 030ee7ae52a46a2be52ccc8242c4a330aba8d38e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:12 +0100 Subject: [PATCH 090/258] USB: serial: ch341: fix modem-control and B0 handling The modem-control signals are managed by the tty-layer during open and should not be asserted prematurely when set_termios is called from driver open. Also make sure that the signals are asserted only when changing speed from B0. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 6279df905c14..0cc5056b304d 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -384,10 +384,6 @@ static void ch341_set_termios(struct tty_struct *tty, ctrl |= CH341_LCR_STOP_BITS_2; if (baud_rate) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); - priv->baud_rate = baud_rate; r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl); @@ -395,14 +391,16 @@ static void ch341_set_termios(struct tty_struct *tty, priv->baud_rate = tty_termios_baud_rate(old_termios); tty_termios_copy_hw(&tty->termios, old_termios); } - } else { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); } - ch341_set_handshake(port->serial->dev, priv->line_control); + spin_lock_irqsave(&priv->lock, flags); + if (C_BAUD(tty) == B0) + priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); + else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) + priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); + spin_unlock_irqrestore(&priv->lock, flags); + ch341_set_handshake(port->serial->dev, priv->line_control); } static void ch341_break_ctl(struct tty_struct *tty, int break_state) From f2950b78547ffb8475297ada6b92bc2d774d5461 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:13 +0100 Subject: [PATCH 091/258] USB: serial: ch341: fix open error handling Make sure to stop the interrupt URB before returning on errors during open. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 0cc5056b304d..8f41d4385f1c 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -319,7 +319,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) r = ch341_configure(serial->dev, priv); if (r) - goto out; + return r; if (tty) ch341_set_termios(tty, port, NULL); @@ -329,12 +329,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) if (r) { dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n", __func__, r); - goto out; + return r; } r = usb_serial_generic_open(tty, port); + if (r) + goto err_kill_interrupt_urb; -out: return r; + return 0; + +err_kill_interrupt_urb: + usb_kill_urb(port->interrupt_in_urb); + + return r; } /* Old_termios contains the original termios settings and From ce5e292828117d1b71cbd3edf9e9137cf31acd30 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:14 +0100 Subject: [PATCH 092/258] USB: serial: ch341: fix resume after reset Fix reset-resume handling which failed to resubmit the read and interrupt URBs, thereby leaving a port that was open before suspend in a broken state until closed and reopened. Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after resume") Fixes: 2bfd1c96a9fb ("USB: serial: ch341: remove reset_resume callback") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 8f41d4385f1c..5343d65f3b52 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -582,14 +582,23 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { - struct ch341_private *priv; - - priv = usb_get_serial_port_data(serial->port[0]); + struct usb_serial_port *port = serial->port[0]; + struct ch341_private *priv = usb_get_serial_port_data(port); + int ret; /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); - return 0; + if (tty_port_initialized(&port->port)) { + ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); + if (ret) { + dev_err(&port->dev, "failed to submit interrupt urb: %d\n", + ret); + return ret; + } + } + + return usb_serial_generic_resume(serial); } static struct usb_serial_driver ch341_device = { From 3cca8624b6624e7ffb87dcd8a0a05bef9b50e97b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:15 +0100 Subject: [PATCH 093/258] USB: serial: ch341: fix line settings after reset-resume A recent change added support for modifying the default line-control settings, but did not make sure that the modified settings were used as part of reconfiguration after a device has been reset during resume. This caused a port that was open before suspend to be unusable until being closed and reopened. Fixes: ba781bdf8662 ("USB: serial: ch341: add support for parity, frame length, stop bits") Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 5343d65f3b52..eabdd05a2147 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -95,6 +95,7 @@ struct ch341_private { unsigned baud_rate; /* set baud rate */ u8 line_control; /* set line control value RTS/DTR */ u8 line_status; /* active status of modem control inputs */ + u8 lcr; }; static void ch341_set_termios(struct tty_struct *tty, @@ -232,7 +233,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - r = ch341_init_set_baudrate(dev, priv, 0); + r = ch341_init_set_baudrate(dev, priv, priv->lcr); if (r < 0) goto out; @@ -397,6 +398,8 @@ static void ch341_set_termios(struct tty_struct *tty, if (r < 0 && old_termios) { priv->baud_rate = tty_termios_baud_rate(old_termios); tty_termios_copy_hw(&tty->termios, old_termios); + } else if (r == 0) { + priv->lcr = ctrl; } } From 55fa15b5987db22b4f35d3f0798928c126be5f1c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:16 +0100 Subject: [PATCH 094/258] USB: serial: ch341: fix baud rate and line-control handling Revert to using direct register writes to set the divisor and line-control registers. A recent change switched to using the init vendor command to update these registers, something which also enabled support for CH341A devices. It turns out that simply setting bit 7 in the divisor register is sufficient to support CH341A and specifically prevent data from being buffered until a full endpoint-size packet (32 bytes) has been received. Using the init command also had the side-effect of temporarily deasserting the DTR/RTS signals on every termios change (including initialisation on open) something which for example could cause problems in setups where DTR is used to trigger a reset. Fixes: 4e46c410e050 ("USB: serial: ch341: reinitialize chip on reconfiguration") Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index eabdd05a2147..8d7b0847109b 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -133,8 +133,8 @@ static int ch341_control_in(struct usb_device *dev, return r; } -static int ch341_init_set_baudrate(struct usb_device *dev, - struct ch341_private *priv, unsigned ctrl) +static int ch341_set_baudrate_lcr(struct usb_device *dev, + struct ch341_private *priv, u8 lcr) { short a; int r; @@ -157,9 +157,19 @@ static int ch341_init_set_baudrate(struct usb_device *dev, factor = 0x10000 - factor; a = (factor & 0xff00) | divisor; - /* 0x9c is "enable SFR_UART Control register and timer" */ - r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, - 0x9c | (ctrl << 8), a | 0x80); + /* + * CH341A buffers data until a full endpoint-size packet (32 bytes) + * has been received unless bit 7 is set. + */ + a |= BIT(7); + + r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x1312, a); + if (r) + return r; + + r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x2518, lcr); + if (r) + return r; return r; } @@ -233,7 +243,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - r = ch341_init_set_baudrate(dev, priv, priv->lcr); + r = ch341_set_baudrate_lcr(dev, priv, priv->lcr); if (r < 0) goto out; @@ -394,7 +404,7 @@ static void ch341_set_termios(struct tty_struct *tty, if (baud_rate) { priv->baud_rate = baud_rate; - r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl); + r = ch341_set_baudrate_lcr(port->serial->dev, priv, ctrl); if (r < 0 && old_termios) { priv->baud_rate = tty_termios_baud_rate(old_termios); tty_termios_copy_hw(&tty->termios, old_termios); From 5149fd327f16e393c1d04fa5325ab072c32472bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 13:36:30 -0800 Subject: [PATCH 095/258] xfs: bump up reserved blocks in xfs_alloc_set_aside Setting aside 4 blocks globally for bmbt splits isn't all that useful, as different threads can allocate space in parallel. Bump it to 4 blocks per AG to allow each thread that is currently doing an allocation to dip into it separately. Without that we may no have enough reserved blocks if there are enough parallel transactions in an almost out space file system that all run into bmap btree splits. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 5050056a0b06..0a46f8488b8d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -95,10 +95,7 @@ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - unsigned int blocks; - - blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); - return blocks; + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); } /* From 255c516278175a6dc7037d1406307f35237d8688 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 13:36:19 -0800 Subject: [PATCH 096/258] xfs: fix bogus minleft manipulations We can't just set minleft to 0 when we're low on space - that's exactly what we need minleft for: to protect space in the AG for btree block allocations when we are low on free space. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 24 +++++++----------------- fs/xfs/libxfs/xfs_bmap.c | 3 --- fs/xfs/libxfs/xfs_bmap_btree.c | 3 +-- 3 files changed, 8 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0a46f8488b8d..fe925702c955 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2635,12 +2635,10 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ int bump_rotor = 0; - int no_min = 0; xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; @@ -2669,7 +2667,6 @@ xfs_alloc_vextent( trace_xfs_alloc_vextent_badargs(args); return 0; } - minleft = args->minleft; switch (type) { case XFS_ALLOCTYPE_THIS_AG: @@ -2680,9 +2677,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2747,9 +2742,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2789,20 +2782,17 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (no_min == 1) { + if (flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } + + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; } } xfs_perag_put(args->pag); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2760bc3b2536..44773c9eb957 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3812,7 +3812,6 @@ xfs_bmap_btalloc( args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; - args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; @@ -4344,8 +4343,6 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->dfops->dop_low) - bma->minleft = 0; if (bma->cur) bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d6330c297ca0..d9be241fc86f 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -502,12 +502,11 @@ try_another_ag: if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if + * a full btree split. Try again and if * successful activate the lowspace algorithm. */ args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; error = xfs_alloc_vextent(&args); if (error) goto error0; From 54fee133ad59c87ab01dd84ab3e9397134b32acb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 13:44:30 -0800 Subject: [PATCH 097/258] xfs: adjust allocation length in xfs_alloc_space_available We must decide in xfs_alloc_fix_freelist if we can perform an allocation from a given AG is possible or not based on the available space, and should not fail the allocation past that point on a healthy file system. But currently we have two additional places that second-guess xfs_alloc_fix_freelist: xfs_alloc_ag_vextent tries to adjust the maxlen parameter to remove the reservation before doing the allocation (but ignores the various minium freespace requirements), and xfs_alloc_fix_minleft tries to fix up the allocated length after we've found an extent, but ignores the reservations and also doesn't take the AGFL into account (and thus fails allocations for not matching minlen in some cases). Remove all these later fixups and just correct the maxlen argument inside xfs_alloc_fix_freelist once we have the AGF buffer locked. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 81 ++++++++------------------------------- fs/xfs/libxfs/xfs_alloc.h | 2 +- 2 files changed, 18 insertions(+), 65 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fe925702c955..f2e7eb6e5243 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -362,35 +362,11 @@ xfs_alloc_fix_len( return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); + ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= + rlen + args->minleft); args->len = rlen; } -/* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - /* casts to (int) catch length underflows */ - if ((int)args->len >= (int)args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - /* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the @@ -686,8 +662,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; - xfs_extlen_t reservation; - xfs_extlen_t oldmax; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -695,20 +669,6 @@ xfs_alloc_ag_vextent( ASSERT(args->mod < args->prod); ASSERT(args->alignment > 0); - /* - * Clamp maxlen to the amount of free space minus any reservations - * that have been made. - */ - oldmax = args->maxlen; - reservation = xfs_ag_resv_needed(args->pag, args->resv); - if (args->maxlen > args->pag->pagf_freeblks - reservation) - args->maxlen = args->pag->pagf_freeblks - reservation; - if (args->maxlen == 0) { - args->agbno = NULLAGBLOCK; - args->maxlen = oldmax; - return 0; - } - /* * Branch to correct routine based on the type. */ @@ -728,8 +688,6 @@ xfs_alloc_ag_vextent( /* NOTREACHED */ } - args->maxlen = oldmax; - if (error || args->agbno == NULLAGBLOCK) return error; @@ -838,9 +796,6 @@ xfs_alloc_ag_vextent_exact( args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - ASSERT(args->agbno + args->len <= tend); /* @@ -1146,12 +1101,7 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; + /* * We are allocating starting at bnew for blen blocks. */ @@ -1343,12 +1293,6 @@ restart: */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, args->datatype, ltbnoa, ltlena, <new); @@ -1550,8 +1494,6 @@ restart: } xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* @@ -2070,10 +2012,20 @@ xfs_alloc_space_available( /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - - reservation - min_free - args->total); - if (available < (int)args->minleft || available <= 0) + reservation - min_free - args->minleft); + if (available < (int)args->total) return false; + /* + * Clamp maxlen to the amount of free space available for the actual + * extent allocation. + */ + if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { + args->maxlen = available; + ASSERT(args->maxlen > 0); + ASSERT(args->maxlen >= args->minlen); + } + return true; } @@ -2119,7 +2071,8 @@ xfs_alloc_fix_freelist( } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, flags | + XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7c404a6b0ae3..1d0f48a501a3 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ - +#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ /* * Argument structure for xfs_alloc routines. From 12ef830198b0d71668eb9b59f9ba69d32951a48a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 13:39:35 -0800 Subject: [PATCH 098/258] xfs: don't rely on ->total in xfs_alloc_space_available ->total is a bit of an odd parameter passed down to the low-level allocator all the way from the high-level callers. It's supposed to contain the maximum number of blocks to be allocated for the whole transaction [1]. But in xfs_iomap_write_allocate we only convert existing delayed allocations and thus only have a minimal block reservation for the current transaction, so xfs_alloc_space_available can't use it for the allocation decisions. Use the maximum of args->total and the calculated block requirement to make a decision. We probably should get rid of args->total eventually and instead apply ->minleft more broadly, but that will require some extensive changes all over. [1] which creates lots of confusion as most callers don't decrement it once doing a first allocation. But that's for a separate series. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index f2e7eb6e5243..9f06a211e157 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1995,7 +1995,7 @@ xfs_alloc_space_available( int flags) { struct xfs_perag *pag = args->pag; - xfs_extlen_t longest; + xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; @@ -2005,15 +2005,16 @@ xfs_alloc_space_available( reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ + alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, reservation); - if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + if (longest < alloc_len) return false; /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - reservation - min_free - args->minleft); - if (available < (int)args->total) + if (available < (int)max(args->total, alloc_len)) return false; /* From 84a4620cfe97c9d57e39b2369bfb77faff55063d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 13:41:33 -0800 Subject: [PATCH 099/258] xfs: don't print warnings when xfs_log_force fails There are only two reasons for xfs_log_force / xfs_log_force_lsn to fail: one is an I/O error, for which xlog_bdstrat already logs a warning, and the second is an already shutdown log due to a previous I/O errors. In the latter case we'll already have a previous indication for the actual error, but the large stream of misleading warnings from xfs_log_force will probably scroll it out of the message buffer. Simply removing the warnings thus makes the XFS log reporting significantly better. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c39ac14ff540..b1469f0a91a6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3317,12 +3317,8 @@ xfs_log_force( xfs_mount_t *mp, uint flags) { - int error; - trace_xfs_log_force(mp, 0, _RET_IP_); - error = _xfs_log_force(mp, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force(mp, flags, NULL); } /* @@ -3466,12 +3462,8 @@ xfs_log_force_lsn( xfs_lsn_t lsn, uint flags) { - int error; - trace_xfs_log_force(mp, lsn, _RET_IP_); - error = _xfs_log_force_lsn(mp, lsn, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force_lsn(mp, lsn, flags, NULL); } /* From 32cd7cbbacf700885a2316275f188f2d5739b5f4 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 6 Jan 2017 19:31:35 -0500 Subject: [PATCH 100/258] md/raid5: Use correct IS_ERR() variation on pointer check This fixes a build error on certain architectures, such as ppc64. Fixes: 6995f0b247e("md: takeover should clear unrelated bits") Signed-off-by: Jes Sorensen Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7b1da6e95a56..36c13e4be9c9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7831,7 +7831,7 @@ static void *raid5_takeover_raid1(struct mddev *mddev) mddev->new_chunk_sectors = chunksect; ret = setup_conf(mddev); - if (!IS_ERR_VALUE(ret)) + if (!IS_ERR(ret)) mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); return ret; From 5dedade6dfa243c130b85d1e4daba6f027805033 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 9 Jan 2017 12:41:43 +0100 Subject: [PATCH 101/258] x86/CPU: Add native CPUID variants returning a single datum ... similarly to the cpuid_() variants. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170109114147.5082-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index eaf100508c36..1be64da0384e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, : "memory"); } +#define native_cpuid_reg(reg) \ +static inline unsigned int native_cpuid_##reg(unsigned int op) \ +{ \ + unsigned int eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum. + */ +native_cpuid_reg(eax) +native_cpuid_reg(ebx) +native_cpuid_reg(ecx) +native_cpuid_reg(edx) + static inline void load_cr3(pgd_t *pgdir) { write_cr3(__pa(pgdir)); From f3e2a51f568d9f33370f4e8bb05669a34223241a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 9 Jan 2017 12:41:44 +0100 Subject: [PATCH 102/258] x86/microcode: Use native CPUID to tickle out microcode revision Intel supplies the microcode revision value in MSR 0x8b (IA32_BIOS_SIGN_ID) after CPUID(1) has been executed. Execute it each time before reading that MSR. It used to do sync_core() which did do CPUID but c198b121b1a1 ("x86/asm: Rewrite sync_core() to use IRET-to-self") changed the sync_core() implementation so we better make the microcode loading case explicit, as the SDM documents it. Reported-and-tested-by: Jun'ichi Nomura Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170109114147.5082-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 26 +++----------------------- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..2d49aa949fa1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -83,7 +83,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* Required by the SDM */ - sync_core(); + native_cpuid_eax(1); rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index b624b54912e1..f79249fab389 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -368,26 +368,6 @@ next: return patch; } -static void cpuid_1(void) -{ - /* - * According to the Intel SDM, Volume 3, 9.11.7: - * - * CPUID returns a value in a model specific register in - * addition to its usual register return values. The - * semantics of CPUID cause it to deposit an update ID value - * in the 64-bit model-specific register at address 08BH - * (IA32_BIOS_SIGN_ID). If no update is present in the - * processor, the value in the MSR remains unmodified. - * - * Use native_cpuid -- this code runs very early and we don't - * want to mess with paravirt. - */ - unsigned int eax = 1, ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); -} - static int collect_cpu_info_early(struct ucode_cpu_info *uci) { unsigned int val[2]; @@ -413,7 +393,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); + native_cpuid_eax(1); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); @@ -613,7 +593,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); + native_cpuid_eax(1); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); @@ -825,7 +805,7 @@ static int apply_microcode_intel(int cpu) wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); + native_cpuid_eax(1); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); From 4167709bbf826512a52ebd6aafda2be104adaec9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 9 Jan 2017 12:41:45 +0100 Subject: [PATCH 103/258] x86/microcode/intel: Add a helper which gives the microcode revision Since on Intel we're required to do CPUID(1) first, before reading the microcode revision MSR, let's add a special helper which does the required steps so that we don't forget to do them next time, when we want to read the microcode revision. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170109114147.5082-4-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/microcode_intel.h | 15 +++++++++ arch/x86/kernel/cpu/intel.c | 11 ++----- arch/x86/kernel/cpu/microcode/intel.c | 43 ++++++++------------------ 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 195becc6f780..e793fc9a9b20 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -52,6 +52,21 @@ struct extended_sigtable { #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) +static inline u32 intel_get_microcode_revision(void) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; +} + #ifdef CONFIG_MICROCODE_INTEL extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2d49aa949fa1..203f860d2ab3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { - unsigned lower_word; - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* Required by the SDM */ - native_cpuid_eax(1); - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } + if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) + c->microcode = intel_get_microcode_revision(); /* * Atom erratum AAE44/AAF40/AAG38/AAH41: diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f79249fab389..faec8fa68ffd 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -390,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); csig.pf = 1 << ((val[1] >> 18) & 7); } - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - csig.rev = val[1]; + csig.rev = intel_get_microcode_revision(); uci->cpu_sig = csig; uci->valid = 1; @@ -582,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci) static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { struct microcode_intel *mc; - unsigned int val[2]; + u32 rev; mc = uci->mc; if (!mc) @@ -590,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc->hdr.rev) + rev = intel_get_microcode_revision(); + if (rev != mc->hdr.rev) return -1; #ifdef CONFIG_X86_64 /* Flush global tlb. This is precaution. */ flush_tlb_early(); #endif - uci->cpu_sig.rev = val[1]; + uci->cpu_sig.rev = rev; if (early) print_ucode(uci); @@ -784,8 +772,8 @@ static int apply_microcode_intel(int cpu) struct microcode_intel *mc; struct ucode_cpu_info *uci; struct cpuinfo_x86 *c; - unsigned int val[2]; static int prev_rev; + u32 rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) @@ -802,33 +790,28 @@ static int apply_microcode_intel(int cpu) /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); + rev = intel_get_microcode_revision(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - if (val[1] != mc->hdr.rev) { + if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); return -1; } - if (val[1] != prev_rev) { + if (rev != prev_rev) { pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", - val[1], + rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24, (mc->hdr.date >> 16) & 0xff); - prev_rev = val[1]; + prev_rev = rev; } c = &cpu_data(cpu); - uci->cpu_sig.rev = val[1]; - c->microcode = val[1]; + uci->cpu_sig.rev = rev; + c->microcode = rev; return 0; } From 9fcf5ba2ef908af916e9002891fbbca20ce4dc98 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Mon, 9 Jan 2017 12:41:46 +0100 Subject: [PATCH 104/258] x86/microcode/intel: Fix allocation size of struct ucode_patch We allocate struct ucode_patch here. @size is the size of microcode data and used for kmemdup() later in this function. Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading") Signed-off-by: Jun'ichi Nomura Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/7a730dc9-ac17-35c4-fe76-dfc94e5ecd95@ce.jp.nec.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index faec8fa68ffd..943486589757 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size) { struct ucode_patch *p; - p = kzalloc(size, GFP_KERNEL); + p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); From 2e86222c67bb5d942da68e8415749b32db208534 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Mon, 9 Jan 2017 12:41:47 +0100 Subject: [PATCH 105/258] x86/microcode/intel: Use correct buffer size for saving microcode data In generic_load_microcode(), curr_mc_size is the size of the last allocated buffer and since we have this performance "optimization" there to vmalloc a new buffer only when the current one is bigger, curr_mc_size ends up becoming the size of the biggest buffer we've seen so far. However, we end up saving the microcode patch which matches our CPU and its size is not curr_mc_size but the respective mc_size during the iteration while we're staring at it. So save that mc_size into a separate variable and use it to store the previously found microcode buffer. Without this fix, we could get oops like this: BUG: unable to handle kernel paging request at ffffc9000e30f000 IP: __memcpy+0x12/0x20 ... Call Trace: ? kmemdup+0x43/0x60 __alloc_microcode_buf+0x44/0x70 save_microcode_patch+0xd4/0x150 generic_load_microcode+0x1b8/0x260 request_microcode_user+0x15/0x20 microcode_write+0x91/0x100 __vfs_write+0x34/0x120 vfs_write+0xc1/0x130 SyS_write+0x56/0xc0 do_syscall_64+0x6c/0x160 entry_SYSCALL64_slow_path+0x25/0x25 Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading") Signed-off-by: Jun'ichi Nomura Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/4f33cbfd-44f2-9bed-3b66-7446cd14256f@ce.jp.nec.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/intel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 943486589757..3f329b74e040 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -823,7 +823,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; int new_rev = uci->cpu_sig.rev; unsigned int leftover = size; - unsigned int curr_mc_size = 0; + unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; while (leftover) { @@ -864,6 +864,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; + new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ } @@ -889,7 +890,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, curr_mc_size); + save_mc_for_early(new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); From 3895dbf8985f656675b5bde610723a29cbce3fa7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Jan 2017 14:18:43 +1300 Subject: [PATCH 106/258] mnt: Protect the mountpoint hashtable with mount_lock Protecting the mountpoint hashtable with namespace_sem was sufficient until a call to umount_mnt was added to mntput_no_expire. At which point it became possible for multiple calls of put_mountpoint on the same hash chain to happen on the same time. Kristen Johansen reported: > This can cause a panic when simultaneous callers of put_mountpoint > attempt to free the same mountpoint. This occurs because some callers > hold the mount_hash_lock, while others hold the namespace lock. Some > even hold both. > > In this submitter's case, the panic manifested itself as a GP fault in > put_mountpoint() when it called hlist_del() and attempted to dereference > a m_hash.pprev that had been poisioned by another thread. Al Viro observed that the simple fix is to switch from using the namespace_sem to the mount_lock to protect the mountpoint hash table. I have taken Al's suggested patch moved put_mountpoint in pivot_root (instead of taking mount_lock an additional time), and have replaced new_mountpoint with get_mountpoint a function that does the hash table lookup and addition under the mount_lock. The introduction of get_mounptoint ensures that only the mount_lock is needed to manipulate the mountpoint hashtable. d_set_mounted is modified to only set DCACHE_MOUNTED if it is not already set. This allows get_mountpoint to use the setting of DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry happens exactly once. Cc: stable@vger.kernel.org Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts") Reported-by: Krister Johansen Suggested-by: Al Viro Acked-by: Al Viro Signed-off-by: "Eric W. Biederman" --- fs/dcache.c | 7 ++++-- fs/namespace.c | 68 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 769903dbc19d..95d71eda8142 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry) } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { - dentry->d_flags |= DCACHE_MOUNTED; - ret = 0; + ret = -EBUSY; + if (!d_mountpoint(dentry)) { + dentry->d_flags |= DCACHE_MOUNTED; + ret = 0; + } } spin_unlock(&dentry->d_lock); out: diff --git a/fs/namespace.c b/fs/namespace.c index b5b1259e064f..487ba30bb5c6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *get_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; + struct mountpoint *mp, *new = NULL; int ret; - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) - return ERR_PTR(-ENOMEM); - - ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); + if (d_mountpoint(dentry)) { +mountpoint: + read_seqlock_excl(&mount_lock); + mp = lookup_mountpoint(dentry); + read_sequnlock_excl(&mount_lock); + if (mp) + goto done; } - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); + if (!new) + new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!new) + return ERR_PTR(-ENOMEM); + + + /* Exactly one processes may set d_mounted */ + ret = d_set_mounted(dentry); + + /* Someone else set d_mounted? */ + if (ret == -EBUSY) + goto mountpoint; + + /* The dentry is not available as a mountpoint? */ + mp = ERR_PTR(ret); + if (ret) + goto done; + + /* Add the new mountpoint to the hash table */ + read_seqlock_excl(&mount_lock); + new->m_dentry = dentry; + new->m_count = 1; + hlist_add_head(&new->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&new->m_list); + read_sequnlock_excl(&mount_lock); + + mp = new; + new = NULL; +done: + kfree(new); return mp; } @@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); @@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -2038,9 +2062,7 @@ retry: namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); + struct mountpoint *mp = get_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); inode_unlock(dentry->d_inode); @@ -2059,7 +2081,11 @@ retry: static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); inode_unlock(dentry->d_inode); } @@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); /* A moved mount should not expire automatically */ list_del_init(&new_mnt->mnt_expire); + put_mountpoint(root_mp); unlock_mount_hash(); chroot_fs_refs(&root, &new); - put_mountpoint(root_mp); error = 0; out4: unlock_mount(old_mp); From 75422726b0f717d67db3283c2eb5bc14fa2619c5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Jan 2017 17:37:27 +1300 Subject: [PATCH 107/258] libfs: Modify mount_pseudo_xattr to be clear it is not a userspace mount Add MS_KERNMOUNT to the flags that are passed. Use sget_userns and force &init_user_ns instead of calling sget so that even if called from a weird context the internal filesystem will be considered to be in the intial user namespace. Luis Ressel reported that the the failure to pass MS_KERNMOUNT into mount_pseudo broke his in development graphics driver that uses the generic drm infrastructure. I am not certain the deriver was bug free in it's usage of that infrastructure but since mount_pseudo_xattr can never be triggered by userspace it is clearer and less error prone, and less problematic for the code to be explicit. Reported-by: Luis Ressel Tested-by: Luis Ressel Acked-by: Al Viro Signed-off-by: "Eric W. Biederman" --- fs/libfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/libfs.c b/fs/libfs.c index e973cd51f126..28d6f35feed6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); + s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); From add7c65ca426b7a37184dd3d2172394e23d585d6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 4 Jan 2017 19:28:14 -0800 Subject: [PATCH 108/258] pid: fix lockdep deadlock warning due to ucount_lock ========================================================= [ INFO: possible irq lock inversion dependency detected ] 4.10.0-rc2-00024-g4aecec9-dirty #118 Tainted: G W --------------------------------------------------------- swapper/1/0 just changed the state of lock: (&(&sighand->siglock)->rlock){-.....}, at: [] __lock_task_sighand+0xb6/0x2c0 but this lock took another, HARDIRQ-unsafe lock in the past: (ucounts_lock){+.+...} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &(&sighand->siglock)->rlock --> &(&tty->ctrl_lock)->rlock --> ucounts_lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(ucounts_lock); local_irq_disable(); lock(&(&sighand->siglock)->rlock); lock(&(&tty->ctrl_lock)->rlock); lock(&(&sighand->siglock)->rlock); *** DEADLOCK *** This patch removes a dependency between rlock and ucount_lock. Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces") Cc: stable@vger.kernel.org Signed-off-by: Andrei Vagin Acked-by: Al Viro Signed-off-by: Eric W. Biederman --- kernel/pid_namespace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index df9e8e9e0be7..eef2ce968636 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -151,8 +151,12 @@ out: static void delayed_free_pidns(struct rcu_head *p) { - kmem_cache_free(pid_ns_cachep, - container_of(p, struct pid_namespace, rcu)); + struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu); + + dec_pid_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + + kmem_cache_free(pid_ns_cachep, ns); } static void destroy_pid_namespace(struct pid_namespace *ns) @@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns) ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); - dec_pid_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); call_rcu(&ns->rcu, delayed_free_pidns); } From 93362fa47fe98b62e4a34ab408c4a418432e7939 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Fri, 6 Jan 2017 09:32:32 +0800 Subject: [PATCH 109/258] sysctl: Drop reference added by grab_header in proc_sys_readdir Fixes CVE-2016-9191, proc_sys_readdir doesn't drop reference added by grab_header when return from !dir_emit_dots path. It can cause any path called unregister_sysctl_table will wait forever. The calltrace of CVE-2016-9191: [ 5535.960522] Call Trace: [ 5535.963265] [] schedule+0x3f/0xa0 [ 5535.968817] [] schedule_timeout+0x3db/0x6f0 [ 5535.975346] [] ? wait_for_completion+0x45/0x130 [ 5535.982256] [] wait_for_completion+0xc3/0x130 [ 5535.988972] [] ? wake_up_q+0x80/0x80 [ 5535.994804] [] drop_sysctl_table+0xc4/0xe0 [ 5536.001227] [] drop_sysctl_table+0x77/0xe0 [ 5536.007648] [] unregister_sysctl_table+0x4d/0xa0 [ 5536.014654] [] unregister_sysctl_table+0x7f/0xa0 [ 5536.021657] [] unregister_sched_domain_sysctl+0x15/0x40 [ 5536.029344] [] partition_sched_domains+0x44/0x450 [ 5536.036447] [] ? __mutex_unlock_slowpath+0x111/0x1f0 [ 5536.043844] [] rebuild_sched_domains_locked+0x64/0xb0 [ 5536.051336] [] update_flag+0x11d/0x210 [ 5536.057373] [] ? mutex_lock_nested+0x2df/0x450 [ 5536.064186] [] ? cpuset_css_offline+0x1b/0x60 [ 5536.070899] [] ? trace_hardirqs_on+0xd/0x10 [ 5536.077420] [] ? mutex_lock_nested+0x2df/0x450 [ 5536.084234] [] ? css_killed_work_fn+0x25/0x220 [ 5536.091049] [] cpuset_css_offline+0x35/0x60 [ 5536.097571] [] css_killed_work_fn+0x5c/0x220 [ 5536.104207] [] process_one_work+0x1df/0x710 [ 5536.110736] [] ? process_one_work+0x160/0x710 [ 5536.117461] [] worker_thread+0x12b/0x4a0 [ 5536.123697] [] ? process_one_work+0x710/0x710 [ 5536.130426] [] kthread+0xfe/0x120 [ 5536.135991] [] ret_from_fork+0x1f/0x40 [ 5536.142041] [] ? kthread_create_on_node+0x230/0x230 One cgroup maintainer mentioned that "cgroup is trying to offline a cpuset css, which takes place under cgroup_mutex. The offlining ends up trying to drain active usages of a sysctl table which apprently is not happening." The real reason is that proc_sys_readdir doesn't drop reference added by grab_header when return from !dir_emit_dots path. So this cpuset offline path will wait here forever. See here for details: http://www.openwall.com/lists/oss-security/2016/11/04/13 Fixes: f0c3b5093add ("[readdir] convert procfs") Cc: stable@vger.kernel.org Reported-by: CAI Qian Tested-by: Yang Shukui Signed-off-by: Zhou Chengming Acked-by: Al Viro Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 55313d994895..d4e37acd4821 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) - return 0; + goto out; pos = 2; @@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) break; } } +out: sysctl_head_finish(head); return 0; } From 21d25f6a4217e755906cb548b55ddab39d0e88b9 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 4 Jan 2017 01:22:52 -0800 Subject: [PATCH 110/258] dmaengine: iota: ioat_alloc_chan_resources should not perform sleeping allocations. On a kernel with DEBUG_LOCKS, ioat_free_chan_resources triggers an in_interrupt() warning. With PROVE_LOCKING, it reports detecting a SOFTIRQ-safe to SOFTIRQ-unsafe lock ordering in the same code path. This is because dma_generic_alloc_coherent() checks if the GFP flags permit blocking. It allocates from different subsystems if blocking is permitted. The free path knows how to return the memory to the correct allocator. If GFP_KERNEL is specified then the alloc and free end up going through cma_alloc(), which uses mutexes. Given that ioat_free_chan_resources() can be called in interrupt context, ioat_alloc_chan_resources() must specify GFP_NOWAIT so that the allocations do not block and instead use an allocator that uses spinlocks. Signed-off-by: Krister Johansen Acked-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index ace5cb2cb12f..cc5259b881d4 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -700,7 +700,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, - GFP_KERNEL, &ioat_chan->completion_dma); + GFP_NOWAIT, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; @@ -710,7 +710,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); order = IOAT_MAX_ORDER; - ring = ioat_alloc_ring(c, order, GFP_KERNEL); + ring = ioat_alloc_ring(c, order, GFP_NOWAIT); if (!ring) return -ENOMEM; From 527a27591312e4b3a0f8179f321f9e85c0850df0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 9 Jan 2017 16:50:52 +0200 Subject: [PATCH 111/258] dmaengine: omap-dma: Fix the port_window support We do not yet have users of port_window. The following errors were found when converting the tusb6010_omap.c musb driver: - The peripheral side must have SRC_/DST_PACKED disabled - when configuring the burst for the peripheral side the memory side configuration were overwritten: d->csdp = ... -> d->csdp |= ... - The EI and FI were configured for the wrong sides of the transfers. With these changes and the converted tus6010_omap.c I was able to verify that things are working as they expected to work. Fixes: 201ac4861c19 ("dmaengine: omap-dma: Support for slave devices with data port window") Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/omap-dma.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 4ad101a47e0a..daf479cce691 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -938,6 +938,23 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( d->ccr |= CCR_DST_AMODE_POSTINC; if (port_window) { d->ccr |= CCR_SRC_AMODE_DBLIDX; + + if (port_window_bytes >= 64) + d->csdp |= CSDP_SRC_BURST_64; + else if (port_window_bytes >= 32) + d->csdp |= CSDP_SRC_BURST_32; + else if (port_window_bytes >= 16) + d->csdp |= CSDP_SRC_BURST_16; + + } else { + d->ccr |= CCR_SRC_AMODE_CONSTANT; + } + } else { + d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED; + + d->ccr |= CCR_SRC_AMODE_POSTINC; + if (port_window) { + d->ccr |= CCR_DST_AMODE_DBLIDX; d->ei = 1; /* * One frame covers the port_window and by configure @@ -948,27 +965,11 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( d->fi = -(port_window_bytes - 1); if (port_window_bytes >= 64) - d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED; + d->csdp |= CSDP_DST_BURST_64; else if (port_window_bytes >= 32) - d->csdp = CSDP_SRC_BURST_32 | CSDP_SRC_PACKED; + d->csdp |= CSDP_DST_BURST_32; else if (port_window_bytes >= 16) - d->csdp = CSDP_SRC_BURST_16 | CSDP_SRC_PACKED; - } else { - d->ccr |= CCR_SRC_AMODE_CONSTANT; - } - } else { - d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED; - - d->ccr |= CCR_SRC_AMODE_POSTINC; - if (port_window) { - d->ccr |= CCR_DST_AMODE_DBLIDX; - - if (port_window_bytes >= 64) - d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED; - else if (port_window_bytes >= 32) - d->csdp = CSDP_DST_BURST_32 | CSDP_DST_PACKED; - else if (port_window_bytes >= 16) - d->csdp = CSDP_DST_BURST_16 | CSDP_DST_PACKED; + d->csdp |= CSDP_DST_BURST_16; } else { d->ccr |= CCR_DST_AMODE_CONSTANT; } @@ -1017,7 +1018,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( osg->addr = sg_dma_address(sgent); osg->en = en; osg->fn = sg_dma_len(sgent) / frame_bytes; - if (port_window && dir == DMA_MEM_TO_DEV) { + if (port_window && dir == DMA_DEV_TO_MEM) { osg->ei = 1; /* * One frame covers the port_window and by configure From 497de07d89c1410d76a15bec2bb41f24a2a89f31 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 9 Jan 2017 09:34:48 +0800 Subject: [PATCH 112/258] tmpfs: clear S_ISGID when setting posix ACLs This change was missed the tmpfs modification in In CVE-2016-7097 commit 073931017b49 ("posix_acl: Clear SGID bit when setting file permissions") It can test by xfstest generic/375, which failed to clear setgid bit in the following test case on tmpfs: touch $testfile chown 100:100 $testfile chmod 2755 $testfile _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile Signed-off-by: Gu Zheng Signed-off-by: Al Viro --- fs/posix_acl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 595522022aca..c9d48dc78495 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type) int error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return 0; - if (error == 0) - acl = NULL; + error = posix_acl_update_mode(inode, + &inode->i_mode, &acl); + if (error) + return error; } inode->i_ctime = current_time(inode); From 2e40795c3bf344cfb5220d94566205796e3ef19a Mon Sep 17 00:00:00 2001 From: Dennis Kadioglu Date: Mon, 9 Jan 2017 17:10:46 +0100 Subject: [PATCH 113/258] ALSA: usb-audio: Add a quirk for Plantronics BT600 Plantronics BT600 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x1" and "cannot get freq at ep 0x82". This patch adds the USB ID of the BT600 to quirks.c and avoids those error messages. Signed-off-by: Dennis Kadioglu Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b3fd2382fdd9..eb4b9f7a571e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */ case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ From 19a91dd4e39e755d650444da7f3a571b40a11093 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Fri, 23 Dec 2016 16:01:08 +0100 Subject: [PATCH 114/258] MMC: meson: avoid possible NULL dereference No actual segmentation faults were observed but the coding is at least inconsistent. irqreturn_t meson_mmc_irq(): We should not dereference host before checking it. meson_mmc_irq_thread(): If cmd or mrq are NULL we should not dereference them after writing a warning. Fixes: 51c5d8447bd7 MMC: meson: initial support for GX platforms Signed-off-by: Heinrich Schuchardt Acked-by: Kevin Hilman Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index b352760c041e..09739352834c 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -578,13 +578,15 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) { struct meson_host *host = dev_id; struct mmc_request *mrq; - struct mmc_command *cmd = host->cmd; + struct mmc_command *cmd; u32 irq_en, status, raw_status; irqreturn_t ret = IRQ_HANDLED; if (WARN_ON(!host)) return IRQ_NONE; + cmd = host->cmd; + mrq = host->mrq; if (WARN_ON(!mrq)) @@ -670,10 +672,10 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) int ret = IRQ_HANDLED; if (WARN_ON(!mrq)) - ret = IRQ_NONE; + return IRQ_NONE; if (WARN_ON(!cmd)) - ret = IRQ_NONE; + return IRQ_NONE; data = cmd->data; if (data) { From 146cc8a17a3b4996f6805ee5c080e7101277c410 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Jan 2017 12:05:37 +0100 Subject: [PATCH 115/258] USB: serial: kl5kusb105: fix line-state error handling The current implementation failed to detect short transfers when attempting to read the line state, and also, to make things worse, logged the content of the uninitialised heap transfer buffer. Fixes: abf492e7b3ae ("USB: kl5kusb105: fix DMA buffers on stack") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/kl5kusb105.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 0ee190fc1bf8..6cb45757818f 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -192,10 +192,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port, status_buf, KLSI_STATUSBUF_LEN, 10000 ); - if (rc < 0) - dev_err(&port->dev, "Reading line status failed (error = %d)\n", - rc); - else { + if (rc != KLSI_STATUSBUF_LEN) { + dev_err(&port->dev, "reading line status failed: %d\n", rc); + if (rc >= 0) + rc = -EIO; + } else { status = get_unaligned_le16(status_buf); dev_info(&port->serial->dev->dev, "read status %x %x\n", From 620f1a632ebcc9811c2f8009ba52297c7006f805 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Dec 2016 18:50:13 -0800 Subject: [PATCH 116/258] wusbcore: Fix one more crypto-on-the-stack bug The driver put a constant buffer of all zeros on the stack and pointed a scatterlist entry at it. This doesn't work with virtual stacks. Use ZERO_PAGE instead. Cc: stable@vger.kernel.org # 4.9 only Reported-by: Eric Biggers Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/crypto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 79451f7ef1b7..062c205f0046 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -216,7 +216,6 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, struct scatterlist sg[4], sg_dst; void *dst_buf; size_t dst_size; - const u8 bzero[16] = { 0 }; u8 iv[crypto_skcipher_ivsize(tfm_cbc)]; size_t zero_padding; @@ -261,7 +260,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1)); sg_set_buf(&sg[2], b, blen); /* 0 if well behaved :) */ - sg_set_buf(&sg[3], bzero, zero_padding); + sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0); sg_init_one(&sg_dst, dst_buf, dst_size); skcipher_request_set_tfm(req, tfm_cbc); From e864212078ded276bdb272b2e0ee6a979357ca8a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 23 Dec 2016 11:37:53 +0100 Subject: [PATCH 117/258] target: add XCOPY target/segment desc sense codes As defined in http://www.t10.org/lists/asc-num.htm. To be used during validation of XCOPY target and segment descriptor lists. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_transport.c | 24 ++++++++++++++++++++++++ include/target/target_core_base.h | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7dfefd66df93..1cadc9eefa21 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1693,6 +1693,10 @@ void transport_generic_request_failure(struct se_cmd *cmd, case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE: + case TCM_TOO_MANY_TARGET_DESCS: + case TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE: + case TCM_TOO_MANY_SEGMENT_DESCS: + case TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE: break; case TCM_OUT_OF_RESOURCES: sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -2808,6 +2812,26 @@ static const struct sense_info sense_info_table[] = { .key = ILLEGAL_REQUEST, .asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */ }, + [TCM_TOO_MANY_TARGET_DESCS] = { + .key = ILLEGAL_REQUEST, + .asc = 0x26, + .ascq = 0x06, /* TOO MANY TARGET DESCRIPTORS */ + }, + [TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE] = { + .key = ILLEGAL_REQUEST, + .asc = 0x26, + .ascq = 0x07, /* UNSUPPORTED TARGET DESCRIPTOR TYPE CODE */ + }, + [TCM_TOO_MANY_SEGMENT_DESCS] = { + .key = ILLEGAL_REQUEST, + .asc = 0x26, + .ascq = 0x08, /* TOO MANY SEGMENT DESCRIPTORS */ + }, + [TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE] = { + .key = ILLEGAL_REQUEST, + .asc = 0x26, + .ascq = 0x09, /* UNSUPPORTED SEGMENT DESCRIPTOR TYPE CODE */ + }, [TCM_PARAMETER_LIST_LENGTH_ERROR] = { .key = ILLEGAL_REQUEST, .asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 29e6858bb164..43edf82e54ff 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -174,6 +174,10 @@ enum tcm_sense_reason_table { TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = R(0x16), TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = R(0x17), TCM_COPY_TARGET_DEVICE_NOT_REACHABLE = R(0x18), + TCM_TOO_MANY_TARGET_DESCS = R(0x19), + TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE = R(0x1a), + TCM_TOO_MANY_SEGMENT_DESCS = R(0x1b), + TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE = R(0x1c), #undef R }; From 61c359194c46cbffec9a1f2c59c1c4011222ad84 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 23 Dec 2016 11:37:54 +0100 Subject: [PATCH 118/258] target: use XCOPY TOO MANY TARGET DESCRIPTORS sense spc4r37 6.4.3.4 states: If the number of CSCD descriptors exceeds the allowed number, the copy manager shall terminate the command with CHECK CONDITION status, with the sense key set to ILLEGAL REQUEST, and the additional sense code set to TOO MANY TARGET DESCRIPTORS. LIO currently responds with INVALID FIELD IN PARAMETER LIST, which sees it fail the libiscsi ExtendedCopy.DescrLimits test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 37d5caebffa6..db265ad10fa4 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -201,9 +201,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, " multiple of %d\n", XCOPY_TARGET_DESC_LEN); return -EINVAL; } - if (tdll > 64) { + if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) { pr_err("XCOPY target descriptor supports a maximum" " two src/dest descriptors, tdll: %hu too large..\n", tdll); + /* spc4r37 6.4.3.4 CSCD DESCRIPTOR LIST LENGTH field */ + *sense_ret = TCM_TOO_MANY_TARGET_DESCS; return -EINVAL; } /* From af9f62c1686268c0517b289274d38f3a03bebd2a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 23 Dec 2016 11:37:55 +0100 Subject: [PATCH 119/258] target: bounds check XCOPY segment descriptor list Check the length of the XCOPY request segment descriptor list against the value advertised via the MAXIMUM SEGMENT DESCRIPTOR COUNT field in the RECEIVE COPY OPERATING PARAMETERS response. spc4r37 6.4.3.5 states: If the number of segment descriptors exceeds the allowed number, the copy manager shall terminate the command with CHECK CONDITION status, with the sense key set to ILLEGAL REQUEST, and the additional sense code set to TOO MANY SEGMENT DESCRIPTORS. This functionality is testable using the libiscsi ExtendedCopy.DescrLimits test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index db265ad10fa4..da0f2da732e7 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -308,17 +308,26 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, struct xcopy_op *xop, unsigned char *p, - unsigned int sdll) + unsigned int sdll, sense_reason_t *sense_ret) { unsigned char *desc = p; unsigned int start = 0; int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0; + *sense_ret = TCM_INVALID_PARAMETER_LIST; + if (offset != 0) { pr_err("XCOPY segment descriptor list length is not" " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN); return -EINVAL; } + if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) { + pr_err("XCOPY supports %u segment descriptor(s), sdll: %u too" + " large..\n", RCR_OP_MAX_SG_DESC_COUNT, sdll); + /* spc4r37 6.4.3.5 SEGMENT DESCRIPTOR LIST LENGTH field */ + *sense_ret = TCM_TOO_MANY_SEGMENT_DESCS; + return -EINVAL; + } while (start < sdll) { /* @@ -916,7 +925,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) seg_desc = &p[16]; seg_desc += (rc * XCOPY_TARGET_DESC_LEN); - rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll); + rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, + sdll, &ret); if (rc <= 0) { xcopy_pt_undepend_remotedev(xop); goto out; From 7d38706669ce00603b187f667a4eb67c94eac098 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 23 Dec 2016 11:37:56 +0100 Subject: [PATCH 120/258] target: bounds check XCOPY total descriptor list length spc4r37 6.4.3.5 states: If the combined length of the CSCD descriptors and segment descriptors exceeds the allowed value, then the copy manager shall terminate the command with CHECK CONDITION status, with the sense key set to ILLEGAL REQUEST, and the additional sense code set to PARAMETER LIST LENGTH ERROR. This functionality can be tested using the libiscsi ExtendedCopy.DescrLimits test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index da0f2da732e7..0d10fcf438d1 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -894,6 +894,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) */ tdll = get_unaligned_be16(&p[2]); sdll = get_unaligned_be32(&p[8]); + if (tdll + sdll > RCR_OP_MAX_DESC_LIST_LEN) { + pr_err("XCOPY descriptor list length %u exceeds maximum %u\n", + tdll + sdll, RCR_OP_MAX_DESC_LIST_LEN); + ret = TCM_PARAMETER_LIST_LENGTH_ERROR; + goto out; + } inline_dl = get_unaligned_be32(&p[12]); if (inline_dl != 0) { From c243849720ac237e9e7191fe57f619bb3a871d4c Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:04 +0100 Subject: [PATCH 121/258] target: return UNSUPPORTED TARGET/SEGMENT DESC TYPE CODE sense Use UNSUPPORTED TARGET DESCRIPTOR TYPE CODE and UNSUPPORTED SEGMENT DESCRIPTOR TYPE CODE additional sense codes if a descriptor type in an XCOPY request is not supported, as specified in spc4r37 6.4.5 and 6.4.6. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 0d10fcf438d1..52738a108e5f 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -199,6 +199,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, if (offset != 0) { pr_err("XCOPY target descriptor list length is not" " multiple of %d\n", XCOPY_TARGET_DESC_LEN); + *sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE; return -EINVAL; } if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) { @@ -240,6 +241,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, default: pr_err("XCOPY unsupported descriptor type code:" " 0x%02x\n", desc[0]); + *sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE; goto out; } } @@ -319,6 +321,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, if (offset != 0) { pr_err("XCOPY segment descriptor list length is not" " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN); + *sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE; return -EINVAL; } if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) { @@ -346,6 +349,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, default: pr_err("XCOPY unsupported segment descriptor" "type: 0x%02x\n", desc[0]); + *sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE; goto out; } } From 94aae4caacda89a1bdb7198b260f4ca3595b7ed7 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:05 +0100 Subject: [PATCH 122/258] target: simplify XCOPY wwn->se_dev lookup helper target_xcopy_locate_se_dev_e4() is used to locate an se_dev, based on the WWN provided with the XCOPY request. Remove a couple of unneeded arguments, and rely on the caller for the src/dst test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 52738a108e5f..155db18ee4e7 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -53,18 +53,13 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) return 0; } -static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, - bool src) +static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, + struct se_device **found_dev) { struct se_device *se_dev; - unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; + unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; - if (src) - dev_wwn = &xop->dst_tid_wwn[0]; - else - dev_wwn = &xop->src_tid_wwn[0]; - mutex_lock(&g_device_mutex); list_for_each_entry(se_dev, &g_device_list, g_dev_node) { @@ -78,15 +73,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op if (rc != 0) continue; - if (src) { - xop->dst_dev = se_dev; - pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located" - " se_dev\n", xop->dst_dev); - } else { - xop->src_dev = se_dev; - pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located" - " se_dev\n", xop->src_dev); - } + *found_dev = se_dev; + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); rc = target_depend_item(&se_dev->dev_group.cg_item); if (rc != 0) { @@ -247,9 +235,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, } if (xop->op_origin == XCOL_SOURCE_RECV_OP) - rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true); + rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, + &xop->dst_dev); else - rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false); + rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, + &xop->src_dev); /* * If a matching IEEE NAA 0x83 descriptor for the requested device * is not located on this node, return COPY_ABORTED with ASQ/ASQC From f184210bca6c9d0091ff5e5629dea4cbb8a17c0f Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:06 +0100 Subject: [PATCH 123/258] target: check XCOPY segment descriptor CSCD IDs Ensure that the segment descriptor CSCD descriptor ID values correspond to CSCD descriptor entries located in the XCOPY command parameter list. SPC4r37 6.4.6.1 Table 150 specifies this range as 0000h to 07FFh, where the CSCD descriptor location in the parameter list can be located via: 16 + (id * 32) Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig [ bvanassche: inserted "; " in the format string of an error message and also moved a "||" operator from the start of a line to the end of the previous line ] Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 8 ++++++++ drivers/target/target_core_xcopy.h | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 155db18ee4e7..41a2a8ad1046 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -278,6 +278,14 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op xop->stdi = get_unaligned_be16(&desc[4]); xop->dtdi = get_unaligned_be16(&desc[6]); + + if (xop->stdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX || + xop->dtdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX) { + pr_err("XCOPY segment desc 0x02: unsupported CSCD ID > 0x%x; stdi: %hu dtdi: %hu\n", + XCOPY_CSCD_DESC_ID_LIST_OFF_MAX, xop->stdi, xop->dtdi); + return -EINVAL; + } + pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n", desc_len, xop->stdi, xop->dtdi, dc); diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index 4d3d4dd060f2..e2d141140342 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -5,6 +5,12 @@ #define XCOPY_NAA_IEEE_REGEX_LEN 16 #define XCOPY_MAX_SECTORS 1024 +/* + * SPC4r37 6.4.6.1 + * Table 150 — CSCD descriptor ID values + */ +#define XCOPY_CSCD_DESC_ID_LIST_OFF_MAX 0x07FF + enum xcopy_origin_list { XCOL_SOURCE_RECV_OP = 0x01, XCOL_DEST_RECV_OP = 0x02, From 66640d35c1e4ef3c96ba5edb3c5e2ff8ab812e7a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:07 +0100 Subject: [PATCH 124/258] target: use XCOPY segment descriptor CSCD IDs The XCOPY specification in SPC4r37 states that the XCOPY source and destination device(s) should be derived from the copy source and copy destination (CSCD) descriptor IDs in the XCOPY segment descriptor. The CSCD IDs are generally (for block -> block copies), indexes into the corresponding CSCD descriptor list, e.g. ================================= EXTENDED COPY Header ================================= CSCD Descriptor List - entry 0 + LU ID <--------------<------------------\ - entry 1 | + LU ID <______________<_____________ | ================================= | | Segment Descriptor List | | - segment 0 | | + src CSCD ID = 0 --------->---------+----/ + dest CSCD ID = 1 ___________>______| + len + src lba + dest lba ================================= Currently LIO completely ignores the src and dest CSCD IDs in the Segment Descriptor List, and instead assumes that the first entry in the CSCD list corresponds to the source, and the second to the destination. This commit removes this assumption, by ensuring that the Segment Descriptor List is parsed prior to processing the CSCD Descriptor List. CSCD Descriptor List processing is modified to compare the current list index with the previously obtained src and dest CSCD IDs. Additionally, XCOPY requests where the src and dest CSCD IDs refer to the CSCD Descriptor List entry can now be successfully processed. Fixes: cbf031f ("target: Add support for EXTENDED_COPY copy offload") Link: https://bugzilla.kernel.org/show_bug.cgi?id=191381 Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 79 ++++++++++++++++++------------ 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 41a2a8ad1046..2595c1eb9e91 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -98,7 +98,7 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, - unsigned char *p, bool src) + unsigned char *p, unsigned short cscd_index) { unsigned char *desc = p; unsigned short ript; @@ -143,7 +143,13 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op return -EINVAL; } - if (src) { + if (cscd_index != xop->stdi && cscd_index != xop->dtdi) { + pr_debug("XCOPY 0xe4: ignoring CSCD entry %d - neither src nor " + "dest\n", cscd_index); + return 0; + } + + if (cscd_index == xop->stdi) { memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); /* * Determine if the source designator matches the local device @@ -155,10 +161,15 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source" " received xop\n", xop->src_dev); } - } else { + } + + if (cscd_index == xop->dtdi) { memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); /* - * Determine if the destination designator matches the local device + * Determine if the destination designator matches the local + * device. If @cscd_index corresponds to both source (stdi) and + * destination (dtdi), or dtdi comes after stdi, then + * XCOL_DEST_RECV_OP wins. */ if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0], XCOPY_NAA_IEEE_REGEX_LEN)) { @@ -178,9 +189,9 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, { struct se_device *local_dev = se_cmd->se_dev; unsigned char *desc = p; - int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0; + int offset = tdll % XCOPY_TARGET_DESC_LEN, rc; + unsigned short cscd_index = 0; unsigned short start = 0; - bool src = true; *sense_ret = TCM_INVALID_PARAMETER_LIST; @@ -206,25 +217,19 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, while (start < tdll) { /* - * Check target descriptor identification with 0xE4 type with - * use VPD 0x83 WWPN matching .. + * Check target descriptor identification with 0xE4 type, and + * compare the current index with the CSCD descriptor IDs in + * the segment descriptor. Use VPD 0x83 WWPN matching .. */ switch (desc[0]) { case 0xe4: rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop, - &desc[0], src); + &desc[0], cscd_index); if (rc != 0) goto out; - /* - * Assume target descriptors are in source -> destination order.. - */ - if (src) - src = false; - else - src = true; start += XCOPY_TARGET_DESC_LEN; desc += XCOPY_TARGET_DESC_LEN; - ret++; + cscd_index++; break; default: pr_err("XCOPY unsupported descriptor type code:" @@ -234,12 +239,21 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, } } - if (xop->op_origin == XCOL_SOURCE_RECV_OP) + switch (xop->op_origin) { + case XCOL_SOURCE_RECV_OP: rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, &xop->dst_dev); - else + break; + case XCOL_DEST_RECV_OP: rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, &xop->src_dev); + break; + default: + pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - " + "stdi: %hu dtdi: %hu\n", xop->stdi, xop->dtdi); + rc = -EINVAL; + break; + } /* * If a matching IEEE NAA 0x83 descriptor for the requested device * is not located on this node, return COPY_ABORTED with ASQ/ASQC @@ -256,7 +270,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n", xop->dst_dev, &xop->dst_tid_wwn[0]); - return ret; + return cscd_index; out: return -EINVAL; @@ -913,6 +927,20 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, tdll, sdll, inline_dl); + /* + * skip over the target descriptors until segment descriptors + * have been passed - CSCD ids are needed to determine src and dest. + */ + seg_desc = &p[16] + tdll; + + rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, + sdll, &ret); + if (rc <= 0) + goto out; + + pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc, + rc * XCOPY_SEGMENT_DESC_LEN); + rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll, &ret); if (rc <= 0) goto out; @@ -930,19 +958,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, rc * XCOPY_TARGET_DESC_LEN); - seg_desc = &p[16]; - seg_desc += (rc * XCOPY_TARGET_DESC_LEN); - - rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, - sdll, &ret); - if (rc <= 0) { - xcopy_pt_undepend_remotedev(xop); - goto out; - } transport_kunmap_data_sg(se_cmd); - pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc, - rc * XCOPY_SEGMENT_DESC_LEN); INIT_WORK(&xop->xop_work, target_xcopy_do_work); queue_work(xcopy_wq, &xop->xop_work); return TCM_NO_SENSE; From f94fd098f674b78c29f482da1999d8de0c93c74e Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:08 +0100 Subject: [PATCH 125/258] target: check for XCOPY parameter truncation Check for XCOPY header, CSCD descriptor and segment descriptor list truncation, and respond accordingly. SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs): If the parameter list length causes truncation of the parameter list, then the copy manager shall transfer no data and shall terminate the EXTENDED COPY command with CHECK CONDITION status, with the sense key set to ILLEGAL REQUEST, and the additional sense code set to PARAMETER LIST LENGTH ERROR. This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 14 ++++++++++++++ drivers/target/target_core_xcopy.h | 1 + 2 files changed, 15 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 2595c1eb9e91..a9a6462c66d1 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -888,6 +888,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) return TCM_UNSUPPORTED_SCSI_OPCODE; } + if (se_cmd->data_length < XCOPY_HDR_LEN) { + pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n", + se_cmd->data_length, XCOPY_HDR_LEN); + return TCM_PARAMETER_LIST_LENGTH_ERROR; + } + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); if (!xop) { pr_err("Unable to allocate xcopy_op\n"); @@ -923,6 +929,14 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) goto out; } + if (se_cmd->data_length < (XCOPY_HDR_LEN + tdll + sdll + inline_dl)) { + pr_err("XCOPY parameter truncation: data length %u too small " + "for tdll: %hu sdll: %u inline_dl: %u\n", + se_cmd->data_length, tdll, sdll, inline_dl); + ret = TCM_PARAMETER_LIST_LENGTH_ERROR; + goto out; + } + pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, tdll, sdll, inline_dl); diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index e2d141140342..7c0b105cbe1b 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -1,5 +1,6 @@ #include +#define XCOPY_HDR_LEN 16 #define XCOPY_TARGET_DESC_LEN 32 #define XCOPY_SEGMENT_DESC_LEN 28 #define XCOPY_NAA_IEEE_REGEX_LEN 16 From 87156518da94a696f2b27ab8945d531af2f1d339 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 2 Jan 2017 18:04:09 +0100 Subject: [PATCH 126/258] target: support XCOPY requests without parameters SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs): A parameter list length of zero specifies that the copy manager shall not transfer any data or alter any internal state, and this shall not be considered an error. This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr test. Signed-off-by: David Disseldorp Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- drivers/target/target_core_xcopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index a9a6462c66d1..d828b3b5000b 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -888,6 +888,10 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) return TCM_UNSUPPORTED_SCSI_OPCODE; } + if (se_cmd->data_length == 0) { + target_complete_cmd(se_cmd, SAM_STAT_GOOD); + return TCM_NO_SENSE; + } if (se_cmd->data_length < XCOPY_HDR_LEN) { pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n", se_cmd->data_length, XCOPY_HDR_LEN); From 7b6c1b4c0e1e44544aa18161dba6a741c080a7ef Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 10 Jan 2017 10:46:00 -0600 Subject: [PATCH 127/258] usb: musb: fix runtime PM in debugfs MUSB driver now has runtime PM support, but the debugfs driver misses the PM _get/_put() calls, which could cause MUSB register access failure. Cc: stable@vger.kernel.org # 4.9+ Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_debugfs.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 4fef50e5c8c1..dd70c88419d2 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -114,6 +114,7 @@ static int musb_regdump_show(struct seq_file *s, void *unused) unsigned i; seq_printf(s, "MUSB (M)HDRC Register Dump\n"); + pm_runtime_get_sync(musb->controller); for (i = 0; i < ARRAY_SIZE(musb_regmap); i++) { switch (musb_regmap[i].size) { @@ -132,6 +133,8 @@ static int musb_regdump_show(struct seq_file *s, void *unused) } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return 0; } @@ -145,7 +148,10 @@ static int musb_test_mode_show(struct seq_file *s, void *unused) struct musb *musb = s->private; unsigned test; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); if (test & MUSB_TEST_FORCE_HOST) seq_printf(s, "force host\n"); @@ -194,11 +200,12 @@ static ssize_t musb_test_mode_write(struct file *file, u8 test; char buf[18]; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); if (test) { dev_err(musb->controller, "Error: test mode is already set. " "Please do USB Bus Reset to start a new test.\n"); - return count; + goto ret; } memset(buf, 0x00, sizeof(buf)); @@ -234,6 +241,9 @@ static ssize_t musb_test_mode_write(struct file *file, musb_writeb(musb->mregs, MUSB_TESTMODE, test); +ret: + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } @@ -254,8 +264,13 @@ static int musb_softconnect_show(struct seq_file *s, void *unused) switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: case OTG_STATE_A_WAIT_BCON: + pm_runtime_get_sync(musb->controller); + reg = musb_readb(musb->mregs, MUSB_DEVCTL); connect = reg & MUSB_DEVCTL_SESSION ? 1 : 0; + + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); break; default: connect = -1; @@ -284,6 +299,7 @@ static ssize_t musb_softconnect_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; + pm_runtime_get_sync(musb->controller); if (!strncmp(buf, "0", 1)) { switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: @@ -314,6 +330,8 @@ static ssize_t musb_softconnect_write(struct file *file, } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } From 7c9d8d0c41b3e24473ac7648a7fc2d644ccf08ff Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Mon, 9 Jan 2017 10:21:20 -0600 Subject: [PATCH 128/258] ibmvscsis: Fix srp_transfer_data fail return code If srp_transfer_data fails within ibmvscsis_write_pending, then the most likely scenario is that the client timed out the op and removed the TCE mapping. Thus it will loop forever retrying the op that is pretty much guaranteed to fail forever. A better return code would be EIO instead of EAGAIN. Cc: stable@vger.kernel.org Reported-by: Steven Royer Tested-by: Steven Royer Signed-off-by: Bryant G. Ly Signed-off-by: Bart Van Assche --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 3d3768aaab4f..8fb5c54c7dd3 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3585,7 +3585,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) 1, 1); if (rc) { pr_err("srp_transfer_data() failed: %d\n", rc); - return -EAGAIN; + return -EIO; } /* * We now tell TCM to add this WRITE CDB directly into the TCM storage From dd545b52a3e1efd9f2c6352dbe95ccd0c53461cc Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Tue, 10 Jan 2017 13:29:54 -0700 Subject: [PATCH 129/258] do_direct_IO: Use inode->i_blkbits to compute block count to be cleaned The code currently uses sdio->blkbits to compute the number of blocks to be cleaned. However sdio->blkbits is derived from the logical block size of the underlying block device (Refer to the definition of do_blockdev_direct_IO()). Due to this, generic/299 test would rarely fail when executed on an ext4 filesystem with 64k as the block size and when using a virtio based disk (having 512 byte as the logical block size) inside a kvm guest. This commit fixes the bug by using inode->i_blkbits to compute the number of blocks to be cleaned. Signed-off-by: Chandan Rajendra Reviewed-by: Christoph Hellwig Fixed up by Jeff Moyer to only use/evaluate inode->i_blkbits once, to avoid issues with block size changes with IO in flight. Signed-off-by: Jens Axboe --- fs/direct-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index aeae8c063451..c87bae4376b8 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { const unsigned blkbits = sdio->blkbits; + const unsigned i_blkbits = blkbits + sdio->blkfactor; int ret = 0; while (sdio->block_in_file < sdio->final_block_in_request) { @@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, clean_bdev_aliases( map_bh->b_bdev, map_bh->b_blocknr, - map_bh->b_size >> blkbits); + map_bh->b_size >> i_blkbits); } if (!sdio->blkfactor) From a14d749fcebe97ddf6af6db3d1f6ece85c9ddcb9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 08:56:23 -0700 Subject: [PATCH 130/258] virtio_blk: avoid DMA to stack for the sense buffer Most users of BLOCK_PC requests allocate the sense buffer on the stack, so to avoid DMA to the stack copy them to a field in the heap allocated virtblk_req structure. Without that any attempt at SCSI passthrough I/O, including the SG_IO ioctl from userspace will crash the kernel. Note that this includes running tools like hdparm even when the host does not have SCSI passthrough enabled. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5545a679abd8..3c3b8f601469 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -56,6 +56,7 @@ struct virtblk_req { struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; u8 status; + u8 sense[SCSI_SENSE_BUFFERSIZE]; struct scatterlist sg[]; }; @@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq, } if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); sgs[num_out + num_in++] = &sense; sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); sgs[num_out + num_in++] = &inhdr; From 25b4acfc7de0fc4da3bfea3a316f7282c6fbde81 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 9 Jan 2017 15:20:31 -0500 Subject: [PATCH 131/258] nbd: blk_mq_init_queue returns an error code on failure, not NULL Additionally, don't assign directly to disk->queue, otherwise blk_put_queue (called via put_disk) will choke (panic) on the errno stored there. Bug found by code inspection after Omar found a similar issue in virtio_blk. Compile-tested only. Signed-off-by: Jeff Moyer Reviewed-by: Omar Sandoval Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 38c576f76d36..50a2020b5b72 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1042,6 +1042,7 @@ static int __init nbd_init(void) return -ENOMEM; for (i = 0; i < nbds_max; i++) { + struct request_queue *q; struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) goto out; @@ -1067,12 +1068,13 @@ static int __init nbd_init(void) * every gendisk to have its very own request_queue struct. * These structs are big so we dynamically allocate them. */ - disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set); - if (!disk->queue) { + q = blk_mq_init_queue(&nbd_dev[i].tag_set); + if (IS_ERR(q)) { blk_mq_free_tag_set(&nbd_dev[i].tag_set); put_disk(disk); goto out; } + disk->queue = q; /* * Tell the block layer that we are not a rotational device From 6bf6b0aa3da84a3d9126919a94c49c0fb7ee2fb3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 9 Jan 2017 11:44:12 -0800 Subject: [PATCH 132/258] virtio_blk: fix panic in initialization error path If blk_mq_init_queue() returns an error, it gets assigned to vblk->disk->queue. Then, when we call put_disk(), we end up calling blk_put_queue() with the ERR_PTR, causing a bad dereference. Fix it by only assigning to vblk->disk->queue on success. Signed-off-by: Omar Sandoval Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3c3b8f601469..10332c24f961 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -630,11 +630,12 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_put_disk; - q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); + q = blk_mq_init_queue(&vblk->tag_set); if (IS_ERR(q)) { err = -ENOMEM; goto out_free_tags; } + vblk->disk->queue = q; q->queuedata = vblk; From 7ee7f45a763bd68c3a606595a8c1bb08c3e6146b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 11 Jan 2017 01:27:21 +0200 Subject: [PATCH 133/258] mei: bus: enable OS version only for SPT and newer Sending OS version for support of TPM2_ChangeEPS() is required only for SPT FW (HMB version 2.0) and newer. On older platforms the command should be just ignored by the firmware but some older platforms misbehave so it's safer to send the command only if required. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=192051 Fixes: 7279b238bade (mei: send OS type to the FW) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Tested-by: Jan Niehusmann Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 3 +++ drivers/misc/mei/debugfs.c | 2 ++ drivers/misc/mei/hbm.c | 4 ++++ drivers/misc/mei/hw.h | 6 ++++++ drivers/misc/mei/mei_dev.h | 2 ++ 5 files changed, 17 insertions(+) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 18e05ca7584f..3600c9993a98 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -152,6 +152,9 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) { int ret; + if (!cldev->bus->hbm_f_os_supported) + return; + ret = mei_cldev_enable(cldev); if (ret) return; diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c index c6c051b52f55..c6217a4993ad 100644 --- a/drivers/misc/mei/debugfs.c +++ b/drivers/misc/mei/debugfs.c @@ -180,6 +180,8 @@ static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf, dev->hbm_f_ev_supported); pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n", dev->hbm_f_fa_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n", + dev->hbm_f_os_supported); } pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n", diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index dd7f15a65eed..25b4a1ba522d 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -989,6 +989,10 @@ static void mei_hbm_config_features(struct mei_device *dev) /* Fixed Address Client Support */ if (dev->version.major_version >= HBM_MAJOR_VERSION_FA) dev->hbm_f_fa_supported = 1; + + /* OS ver message Support */ + if (dev->version.major_version >= HBM_MAJOR_VERSION_OS) + dev->hbm_f_os_supported = 1; } /** diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h index 9daf3f9aed25..e1e4d47d4d7d 100644 --- a/drivers/misc/mei/hw.h +++ b/drivers/misc/mei/hw.h @@ -76,6 +76,12 @@ #define HBM_MINOR_VERSION_FA 0 #define HBM_MAJOR_VERSION_FA 2 +/* + * MEI version with OS ver message support + */ +#define HBM_MINOR_VERSION_OS 0 +#define HBM_MAJOR_VERSION_OS 2 + /* Host bus message command opcode */ #define MEI_HBM_CMD_OP_MSK 0x7f /* Host bus message command RESPONSE */ diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 699693cd8c59..8dadb98662a9 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -406,6 +406,7 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @hbm_f_ev_supported : hbm feature event notification * @hbm_f_fa_supported : hbm feature fixed address client * @hbm_f_ie_supported : hbm feature immediate reply to enum request + * @hbm_f_os_supported : hbm feature support OS ver message * * @me_clients_rwsem: rw lock over me_clients list * @me_clients : list of FW clients @@ -487,6 +488,7 @@ struct mei_device { unsigned int hbm_f_ev_supported:1; unsigned int hbm_f_fa_supported:1; unsigned int hbm_f_ie_supported:1; + unsigned int hbm_f_os_supported:1; struct rw_semaphore me_clients_rwsem; struct list_head me_clients; From 488debb9971bc7d0edd6d8080ba78ca02a04f6c4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Jan 2017 17:15:01 +0000 Subject: [PATCH 134/258] drivers: char: mem: Fix thinkos in kmem address checks When borrowing the pfn_valid() check from mmap_kmem(), somebody managed to get physical and virtual addresses spectacularly muddled up, such that we've ended up with checks for one being the other. Whilst this does indeed prevent out-of-bounds accesses crashing, on most systems it also prevents the more desirable use-case of working at all ever. Check the *virtual* offset correctly for what it is. Furthermore, do so in the right place - a read or write may span multiple pages, so a single up-front check is insufficient. High memory accesses already have a similar validity check just before the copy_to_user() call, so just make the low memory path fully consistent with that. Reported-by: Jason A. Donenfeld CC: stable@vger.kernel.org Fixes: 148a1bc84398 ("drivers: char: mem: Check {read,write}_kmem() addresses") Signed-off-by: Robin Murphy Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5bb1985ec484..6d9cc2d39d22 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -381,9 +381,6 @@ static ssize_t read_kmem(struct file *file, char __user *buf, char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - read = 0; if (p < (unsigned long) high_memory) { low_count = count; @@ -412,6 +409,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf, * by the kernel or data corruption may occur */ kbuf = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(kbuf)) + return -ENXIO; if (copy_to_user(buf, kbuf, sz)) return -EFAULT; @@ -482,6 +481,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf, * corruption may occur. */ ptr = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(ptr)) + return -ENXIO; copied = copy_from_user(ptr, buf, sz); if (copied) { @@ -512,9 +513,6 @@ static ssize_t write_kmem(struct file *file, const char __user *buf, char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - if (p < (unsigned long) high_memory) { unsigned long to_write = min_t(unsigned long, count, (unsigned long)high_memory - p); From 89d8232411a85b9a6b12fd5da4d07d8a138a8e0c Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 13 Dec 2016 17:27:56 +0100 Subject: [PATCH 135/258] tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx If we don't disable the transmitter in atmel_stop_tx, the DMA buffer continues to send data until it is emptied. This cause problems with the flow control (CTS is asserted and data are still sent). So, disabling the transmitter in atmel_stop_tx is a sane thing to do. Tested on at91sam9g35-cm(DMA) Tested for regressions on sama5d2-xplained(Fifo) and at91sam9g20ek(PDC) Cc: (beware, this won't apply before 4.3) Signed-off-by: Richard Genoud Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 168b10cad47b..f9d42de5ab2d 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -481,6 +481,14 @@ static void atmel_stop_tx(struct uart_port *port) /* disable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); } + + /* + * Disable the transmitter. + * This is mandatory when DMA is used, otherwise the DMA buffer + * is fully transmitted. + */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS); + /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); @@ -513,6 +521,9 @@ static void atmel_start_tx(struct uart_port *port) /* Enable interrupts */ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); + + /* re-enable the transmitter */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); } /* From b389f173aaa1204d6dc1f299082a162eb0491545 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 6 Dec 2016 13:05:33 +0100 Subject: [PATCH 136/258] tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done When using RS485 in half duplex, RX should be enabled when TX is finished, and stopped when TX starts. Before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA"), RX was not disabled in atmel_start_tx() if the DMA was used. So, collisions could happened. But disabling RX in atmel_start_tx() uncovered another bug: RX was enabled again in the wrong place (in atmel_tx_dma) instead of being enabled when TX is finished (in atmel_complete_tx_dma), so the transmission simply stopped. This bug was not triggered before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA") because RX was never disabled before. Moving atmel_start_rx() in atmel_complete_tx_dma() corrects the problem. Cc: stable@vger.kernel.org Reported-by: Gil Weber Fixes: 0058f0871efe7b01c6 Tested-by: Gil Weber Signed-off-by: Richard Genoud Acked-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index f9d42de5ab2d..fabbe76203bb 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -809,6 +809,11 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); + else if ((port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { + /* DMA done, stop TX, start RX for RS485 */ + atmel_start_rx(port); + } spin_unlock_irqrestore(&port->lock, flags); } @@ -911,12 +916,6 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); - - } else { - if (port->rs485.flags & SER_RS485_ENABLED) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); - } } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) From c130b666a9a711f985a0a44b58699ebe14bb7245 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 28 Dec 2016 16:42:00 -0200 Subject: [PATCH 137/258] 8250_pci: Fix potential use-after-free in error path Commit f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during PCI error recovery") introduces a potential use-after-free in case the pciserial_init_ports call in serial8250_io_resume fails, which may happen if a memory allocation fails or if the .init quirk failed for whatever reason). If this happen, further pci_get_drvdata will return a pointer to freed memory. This patch reworks the PCI recovery resume hook to restore the old priv structure in this case, which should be ok, since the ports were already detached. Such error during recovery causes us to give up on the recovery. Fixes: f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during PCI error recovery") Reported-by: Michal Suchanek Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Guilherme G. Piccoli Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index aa0166b6d450..116436b7fa52 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5642,17 +5642,15 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev) static void serial8250_io_resume(struct pci_dev *dev) { struct serial_private *priv = pci_get_drvdata(dev); - const struct pciserial_board *board; + struct serial_private *new; if (!priv) return; - board = priv->board; - kfree(priv); - priv = pciserial_init_ports(dev, board); - - if (!IS_ERR(priv)) { - pci_set_drvdata(dev, priv); + new = pciserial_init_ports(dev, priv->board); + if (!IS_ERR(new)) { + pci_set_drvdata(dev, new); + kfree(priv); } } From 2bed8a8e70729f996af92042d3ad0f11870acc1f Mon Sep 17 00:00:00 2001 From: Daniel Jedrychowski Date: Mon, 12 Dec 2016 09:18:28 +1100 Subject: [PATCH 138/258] Clearing FIFOs in RS485 emulation mode causes subsequent transmits to break When in RS485 emulation mode, __do_stop_tx_rs485() calls serial8250_clear_fifos(). This not only clears the FIFOs, but also sets all bits in their control register (UART_FCR) to 0. One of the effects of this is the disabling of the FIFOs, which turns them into single-byte holding registers. The rest of the driver doesn't know this, which results in the lions share of characters passed into a write call to be dropped. (I can supply logic analyzer screenshots if necessary) This fix replaces the serial8250_clear_fifos() call to serial8250_clear_and_reinit_fifos() - this prevents the "dropped characters" issue from manifesting again while retaining the requirement of clearing the RX FIFO after transmission if the SER_RS485_RX_DURING_TX flag is disabled. Signed-off-by: Daniel Jedrychowski Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fe4399b41df6..c13fec451d03 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1413,7 +1413,7 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p) * Enable previously disabled RX interrupts. */ if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) { - serial8250_clear_fifos(p); + serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; serial_port_out(&p->port, UART_IER, p->ier); From 6741f551a0b26479de2532ffa43a366747e6dbf3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 11 Dec 2016 10:05:49 +0800 Subject: [PATCH 139/258] Revert "tty: serial: 8250: add CON_CONSDEV to flags" This commit needs to be reverted because it prevents people from using the serial console as a secondary console with input being directed to tty0. IOW, if you boot with console=ttyS0 console=tty0 then all kernels prior to this commit will produce output on both ttyS0 and tty0 but input will only be taken from tty0. With this patch the serial console will always be the primary console instead of tty0, potentially preventing people from getting into their machines in emergency situations. Fixes: d03516df8375 ("tty: serial: 8250: add CON_CONSDEV to flags") Signed-off-by: Herbert Xu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 61569a765d9e..76e03a7de9cc 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -675,7 +675,7 @@ static struct console univ8250_console = { .device = uart_console_device, .setup = univ8250_console_setup, .match = univ8250_console_match, - .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV, + .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &serial8250_reg, }; From 5b11ebedd6a8bb4271b796e498cd15c0fe1133b6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 16:56:49 +0800 Subject: [PATCH 140/258] extcon: return error code on failure Function get_zeroed_page() returns a NULL pointer if there is no enough memory. In function extcon_sync(), it returns 0 if the call to get_zeroed_page() fails. The return value 0 indicates success in the context, which is incosistent with the execution status. This patch fixes the bug by returning -ENOMEM. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188611 Signed-off-by: Pan Bian Fixes: a580982f0836e Cc: stable Acked-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 78298460d168..7c1e3a7b14e0 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -453,7 +453,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) dev_err(&edev->dev, "out of memory in extcon_set_state\n"); kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE); - return 0; + return -ENOMEM; } length = name_show(&edev->dev, NULL, prop_buf); From 0fa2c8eb270413160557babda519aa3c21e2bfaf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Dec 2016 16:23:55 +0000 Subject: [PATCH 141/258] ppdev: don't print a free'd string A previous fix of a memory leak now prints the string 'name' that was previously free'd. Fix this by free'ing the string at the end of the function and adding an error exit path for the error conditions. CoverityScan CID#1384523 ("Use after free") Fixes: 2bd362d5f45c1 ("ppdev: fix memory leak") Signed-off-by: Colin Ian King Acked-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/char/ppdev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 02819e0703c8..87885d146dbb 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -290,6 +290,7 @@ static int register_device(int minor, struct pp_struct *pp) struct pardevice *pdev = NULL; char *name; struct pardev_cb ppdev_cb; + int rc = 0; name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor); if (name == NULL) @@ -298,8 +299,8 @@ static int register_device(int minor, struct pp_struct *pp) port = parport_find_number(minor); if (!port) { pr_warn("%s: no associated port!\n", name); - kfree(name); - return -ENXIO; + rc = -ENXIO; + goto err; } memset(&ppdev_cb, 0, sizeof(ppdev_cb)); @@ -308,16 +309,18 @@ static int register_device(int minor, struct pp_struct *pp) ppdev_cb.private = pp; pdev = parport_register_dev_model(port, name, &ppdev_cb, minor); parport_put_port(port); - kfree(name); if (!pdev) { pr_warn("%s: failed to register device!\n", name); - return -ENXIO; + rc = -ENXIO; + goto err; } pp->pdev = pdev; dev_dbg(&pdev->dev, "registered pardevice\n"); - return 0; +err: + kfree(name); + return rc; } static enum ieee1284_phase init_phase(int mode) From 802c03881f29844af0252b6e22be5d2f65f93fd0 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 6 Jan 2017 02:14:16 +0900 Subject: [PATCH 142/258] sysrq: attach sysrq handler correctly for 32-bit kernel The sysrq input handler should be attached to the input device which has a left alt key. On 32-bit kernels, some input devices which has a left alt key cannot attach sysrq handler. Because the keybit bitmap in struct input_device_id for sysrq is not correctly initialized. KEY_LEFTALT is 56 which is greater than BITS_PER_LONG on 32-bit kernels. I found this problem when using a matrix keypad device which defines a KEY_LEFTALT (56) but doesn't have a KEY_O (24 == 56%32). Cc: Jiri Slaby Signed-off-by: Akinobu Mita Acked-by: Dmitry Torokhov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 52bbd27e93ae..701c085bb19b 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -946,8 +946,8 @@ static const struct input_device_id sysrq_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { BIT_MASK(KEY_LEFTALT) }, + .evbit = { [BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_LEFTALT)] = BIT_MASK(KEY_LEFTALT) }, }, { }, }; From 546cf3ef9c92b76ff0037c871b939e63caea98b3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 26 Dec 2016 09:58:34 -0800 Subject: [PATCH 143/258] auxdisplay: fix new ht16k33 build errors Fix build errors caused by selecting incorrect kconfig symbols. drivers/built-in.o:(.data+0x19cec): undefined reference to `sys_fillrect' drivers/built-in.o:(.data+0x19cf0): undefined reference to `sys_copyarea' drivers/built-in.o:(.data+0x19cf4): undefined reference to `sys_imageblit' Fixes: 31114fa95bdb (auxdisplay: ht16k33: select framebuffer helper modules) Signed-off-by: Randy Dunlap Cc: Miguel Ojeda Sandonis Reported-by: kbuild test robot Acked-by: Robin van der Gracht Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 4ef4c5caed4f..8a8e403644d6 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -132,9 +132,9 @@ config HT16K33 tristate "Holtek Ht16K33 LED controller with keyscan" depends on FB && OF && I2C && INPUT select FB_SYS_FOPS - select FB_CFB_FILLRECT - select FB_CFB_COPYAREA - select FB_CFB_IMAGEBLIT + select FB_SYS_FILLRECT + select FB_SYS_COPYAREA + select FB_SYS_IMAGEBLIT select INPUT_MATRIXKMAP select FB_BACKLIGHT help From 24b91e360ef521a2808771633d76ebc68bd5604b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Jan 2017 15:12:04 +0100 Subject: [PATCH 144/258] nohz: Fix collision between tick and other hrtimers When the tick is stopped and an interrupt occurs afterward, we check on that interrupt exit if the next tick needs to be rescheduled. If it doesn't need any update, we don't want to do anything. In order to check if the tick needs an update, we compare it against the clockevent device deadline. Now that's a problem because the clockevent device is at a lower level than the tick itself if it is implemented on top of hrtimer. Every hrtimer share this clockevent device. So comparing the next tick deadline against the clockevent device deadline is wrong because the device may be programmed for another hrtimer whose deadline collides with the tick. As a result we may end up not reprogramming the tick accidentally. In a worst case scenario under full dynticks mode, the tick stops firing as it is supposed to every 1hz, leaving /proc/stat stalled: Task in a full dynticks CPU ---------------------------- * hrtimer A is queued 2 seconds ahead * the tick is stopped, scheduled 1 second ahead * tick fires 1 second later * on tick exit, nohz schedules the tick 1 second ahead but sees the clockevent device is already programmed to that deadline, fooled by hrtimer A, the tick isn't rescheduled. * hrtimer A is cancelled before its deadline * tick never fires again until an interrupt happens... In order to fix this, store the next tick deadline to the tick_sched local structure and reuse that value later to check whether we need to reprogram the clock after an interrupt. On the other hand, ts->sleep_length still wants to know about the next clock event and not just the tick, so we want to improve the related comment to avoid confusion. Reported-by: James Hartsock Signed-off-by: Frederic Weisbecker Reviewed-by: Wanpeng Li Acked-by: Peter Zijlstra Acked-by: Rik van Riel Link: http://lkml.kernel.org/r/1483539124-5693-1-git-send-email-fweisbec@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 9 +++++++-- kernel/time/tick-sched.h | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2c115fdab397..74e0388cc88d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -767,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, tick = expires; /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && (expires == dev->next_event)) + if (ts->tick_stopped && (expires == ts->next_tick)) goto out; /* @@ -787,6 +787,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, trace_tick_stop(1, TICK_DEP_MASK_NONE); } + ts->next_tick = tick; + /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. @@ -802,7 +804,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, else tick_program_event(tick, 1); out: - /* Update the estimated sleep length */ + /* + * Update the estimated sleep length until the next timer + * (not only the tick). + */ ts->sleep_length = ktime_sub(dev->next_event, now); return tick; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index bf38226e5c17..075444e3d48e 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -27,6 +27,7 @@ enum tick_nohz_mode { * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline * when the CPU returns from nohz sleep. + * @next_tick: Next tick to be fired when in dynticks mode. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -44,6 +45,7 @@ struct tick_sched { unsigned long check_clocks; enum tick_nohz_mode nohz_mode; ktime_t last_tick; + ktime_t next_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; From c8a6a09c1c617402cc9254b2bc8da359a0347d75 Mon Sep 17 00:00:00 2001 From: Augusto Mecking Caringi Date: Tue, 10 Jan 2017 10:45:00 +0000 Subject: [PATCH 145/258] vme: Fix wrong pointer utilization in ca91cx42_slave_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ca91cx42_slave_get function, the value pointed by vme_base pointer is set through: *vme_base = ioread32(bridge->base + CA91CX42_VSI_BS[i]); So it must be dereferenced to be used in calculation of pci_base: *pci_base = (dma_addr_t)*vme_base + pci_offset; This bug was caught thanks to the following gcc warning: drivers/vme/bridges/vme_ca91cx42.c: In function ‘ca91cx42_slave_get’: drivers/vme/bridges/vme_ca91cx42.c:467:14: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] *pci_base = (dma_addr_t)vme_base + pci_offset; Signed-off-by: Augusto Mecking Caringi Acked-By: Martyn Welch Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/vme/bridges/vme_ca91cx42.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c index 6b5ee896af63..7cc51223db1c 100644 --- a/drivers/vme/bridges/vme_ca91cx42.c +++ b/drivers/vme/bridges/vme_ca91cx42.c @@ -464,7 +464,7 @@ static int ca91cx42_slave_get(struct vme_slave_resource *image, int *enabled, vme_bound = ioread32(bridge->base + CA91CX42_VSI_BD[i]); pci_offset = ioread32(bridge->base + CA91CX42_VSI_TO[i]); - *pci_base = (dma_addr_t)vme_base + pci_offset; + *pci_base = (dma_addr_t)*vme_base + pci_offset; *size = (unsigned long long)((vme_bound - *vme_base) + granularity); *enabled = 0; From 69d012345a1a32d3f03957f14d972efccc106a98 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 11 Jan 2017 14:02:00 +0800 Subject: [PATCH 146/258] arm64: hugetlb: fix the wrong return value for huge_ptep_set_access_flags In current code, the @changed always returns the last one's status for the huge page with the contiguous bit set. This is really not what we want. Even one of the PTEs is changed, we should tell it to the caller. This patch fixes this issue. Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") Cc: # 4.5.x- Signed-off-by: Huang Shijie Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 964b7549af5c..e25584d72396 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -239,7 +239,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) { - changed = ptep_set_access_flags(vma, addr, cpte, + changed |= ptep_set_access_flags(vma, addr, cpte, pfn_pte(pfn, hugeprot), dirty); From 2d5a9c72d0c4ac73cf97f4b7814ed6c44b1e49ae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:18 +0100 Subject: [PATCH 147/258] USB: serial: ch341: fix control-message error handling A short control transfer would currently fail to be detected, something which could lead to stale buffer data being used as valid input. Check for short transfers, and make sure to log any transfer errors. Note that this also avoids leaking heap data to user space (TIOCMGET) and the remote device (break control). Fixes: 6ce76104781a ("USB: Driver for CH341 USB-serial adaptor") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 8d7b0847109b..95aa5233726c 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -113,6 +113,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request, r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, index, NULL, 0, DEFAULT_TIMEOUT); + if (r < 0) + dev_err(&dev->dev, "failed to send control message: %d\n", r); return r; } @@ -130,7 +132,20 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - return r; + if (r < bufsize) { + if (r >= 0) { + dev_err(&dev->dev, + "short control message received (%d < %u)\n", + r, bufsize); + r = -EIO; + } + + dev_err(&dev->dev, "failed to receive control message: %d\n", + r); + return r; + } + + return 0; } static int ch341_set_baudrate_lcr(struct usb_device *dev, @@ -181,9 +196,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control) static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; unsigned long flags; buffer = kmalloc(size, GFP_KERNEL); @@ -194,14 +209,9 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - /* setup the private status if available */ - if (r == 2) { - r = 0; - spin_lock_irqsave(&priv->lock, flags); - priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; - spin_unlock_irqrestore(&priv->lock, flags); - } else - r = -EPROTO; + spin_lock_irqsave(&priv->lock, flags); + priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; + spin_unlock_irqrestore(&priv->lock, flags); out: kfree(buffer); return r; @@ -211,9 +221,9 @@ out: kfree(buffer); static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; buffer = kmalloc(size, GFP_KERNEL); if (!buffer) From 6d6daa20945f3f598e56e18d1f926c08754f5801 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 5 Jan 2017 10:09:25 -0500 Subject: [PATCH 148/258] perf/x86/intel/uncore: Fix hardcoded socket 0 assumption in the Haswell init code hswep_uncore_cpu_init() uses a hardcoded physical package id 0 for the boot cpu. This works as long as the boot CPU is actually on the physical package 0, which is normaly the case after power on / reboot. But it fails with a NULL pointer dereference when a kdump kernel is started on a secondary socket which has a different physical package id because the locigal package translation for physical package 0 does not exist. Use the logical package id of the boot cpu instead of hard coded 0. [ tglx: Rewrote changelog once more ] Fixes: cf6d445f6897 ("perf/x86/uncore: Track packages, not per CPU data") Signed-off-by: Prarit Bhargava Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Harish Chegondi Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1483628965-2890-1-git-send-email-prarit@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index e6832be714bc..dae2fedc1601 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { void hswep_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = boot_cpu_data.logical_proc_id; if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; From c38c39bf7cc04d688291f382469e84ec2a8548a4 Mon Sep 17 00:00:00 2001 From: Cedric Izoard Date: Wed, 11 Jan 2017 14:39:07 +0000 Subject: [PATCH 149/258] mac80211: Fix headroom allocation when forwarding mesh pkt This patch fix issue introduced by my previous commit that tried to ensure enough headroom was present, and instead broke it. When forwarding mesh pkt, mac80211 may also add security header, and it must therefore be taken into account in the needed headroom. Fixes: d8da0b5d64d5 ("mac80211: Ensure enough headroom when forwarding mesh pkt") Signed-off-by: Cedric Izoard Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c037c5bb6167..c87e61358b77 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2472,7 +2472,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!ifmsh->mshcfg.dot11MeshForwarding) goto out; - fwd_skb = skb_copy_expand(skb, local->tx_headroom, 0, GFP_ATOMIC); + fwd_skb = skb_copy_expand(skb, local->tx_headroom + + sdata->encrypt_headroom, 0, GFP_ATOMIC); if (!fwd_skb) { net_info_ratelimited("%s: failed to clone mesh frame\n", sdata->name); From d7f842442f766db3f39fc5d166ddcc24bf817056 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 25 Oct 2016 10:32:16 +0300 Subject: [PATCH 150/258] mac80211: fix the TID on NDPs sent as EOSP carrier In the commit below, I forgot to translate the mac80211's AC to QoS IE order. Moreover, the condition in the if was wrong. Fix both issues. This bug would hit only with clients that didn't set all the ACs as delivery enabled. Fixes: f438ceb81d4 ("mac80211: uapsd_queues is in QoS IE order") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b6cfcf038c11..50c309094c37 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1501,8 +1501,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* This will evaluate to 1, 3, 5 or 7. */ for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) - if (ignored_acs & BIT(ac)) - continue; + if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac])) + break; tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); From 06f7c88c107fb469f4f1344142e80df5175c6836 Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Tue, 19 Jul 2016 19:28:56 +0300 Subject: [PATCH 151/258] cfg80211: consider VHT opmode on station update Currently, this attribute is only fetched on station addition, but not on station change. Since this info is only present in the assoc request, with full station state support in the driver it cannot be present when the station is added. Thus, add support for changing the VHT opmode on station update if done before (or while) the station is marked as associated. After this, ignore it, since it used to be ignored. Signed-off-by: Beni Lev Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- net/wireless/nl80211.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6b76e3b0c18e..bea982af9cfb 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1772,7 +1772,9 @@ enum nl80211_commands { * * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode * Notification Element based on association request when used with - * %NL80211_CMD_NEW_STATION; u8 attribute. + * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when + * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS); + * u8 attribute. * * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ef5eff93a8b8..5c1b267e22be 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4615,6 +4615,15 @@ int cfg80211_check_station_change(struct wiphy *wiphy, break; } + /* + * Older kernel versions ignored this attribute entirely, so don't + * reject attempts to update it but mark it as unused instead so the + * driver won't look at the data. + */ + if (statype != CFG80211_STA_AP_CLIENT_UNASSOC && + statype != CFG80211_STA_TDLS_PEER_SETUP) + params->opmode_notif_used = false; + return 0; } EXPORT_SYMBOL(cfg80211_check_station_change); @@ -4854,6 +4863,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.local_pm = pm; } + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { + params.opmode_notif_used = true; + params.opmode_notif = + nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); + } + /* Include parameters for TDLS peer (will check later) */ err = nl80211_set_station_tdls(info, ¶ms); if (err) From 96aa2e7cf126773b16c6c19b7474a8a38d3c707e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Oct 2016 12:23:49 +0200 Subject: [PATCH 152/258] mac80211: calculate min channel width correctly In the current minimum chandef code there's an issue in that the recalculation can happen after rate control is initialized for a station that has a wider bandwidth than the current chanctx, and then rate control can immediately start using those higher rates which could cause problems. Observe that first of all that this problem is because we don't take non-associated and non-uploaded stations into account. The restriction to non-associated is quite pointless and is one of the causes for the problem described above, since the rate init will happen before the station is set to associated; no frames could actually be sent until associated, but the rate table can already contain higher rates and that might cause problems. Also, rejecting non-uploaded stations is wrong, since the rate control can select higher rates for those as well. Secondly, it's then necessary to recalculate the minimal config before initializing rate control, so that when rate control is initialized, the higher rates are already available. This can be done easily by adding the necessary function call in rate init. Change-Id: Ib9bc02d34797078db55459d196993f39dcd43070 Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 3 --- net/mac80211/rate.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e75cbf6ecc26..a0d901d8992e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -231,9 +231,6 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) continue; - if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); } rcu_read_unlock(); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 206698bc93f4..9e2641d45587 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -40,6 +40,8 @@ void rate_control_rate_init(struct sta_info *sta) ieee80211_sta_set_rx_nss(sta); + ieee80211_recalc_min_chandef(sta->sdata); + if (!ref) return; From d2941df8fbd9708035d66d889ada4d3d160170ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 20 Oct 2016 08:52:50 +0200 Subject: [PATCH 153/258] mac80211: recalculate min channel width on VHT opmode changes When an associated station changes its VHT operating mode this can/will affect the bandwidth it's using, and consequently we must recalculate the minimum bandwidth we need to use. Failure to do so can lead to one of two scenarios: 1) we use a too high bandwidth, this is benign 2) we use a too narrow bandwidth, causing rate control and actual PHY configuration to be out of sync, which can in turn cause problems/crashes Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 21 +++++++++++++++++++++ net/mac80211/rx.c | 9 +-------- net/mac80211/vht.c | 4 +++- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 41497b670e2b..d37ae7dc114b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -6,6 +6,7 @@ * Copyright (c) 2006 Jiri Benc * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1295,6 +1296,26 @@ static void ieee80211_iface_work(struct work_struct *work) } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_VHT) { switch (mgmt->u.action.u.vht_group_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + struct ieee80211_rx_status *status; + enum nl80211_band band; + u8 opmode; + + status = IEEE80211_SKB_RXCB(skb); + band = status->band; + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + + if (sta) + ieee80211_vht_handle_opmode(sdata, sta, + opmode, + band); + + mutex_unlock(&local->sta_mtx); + break; + } case WLAN_VHT_ACTION_GROUPID_MGMT: ieee80211_process_mu_groups(sdata, mgmt); break; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c87e61358b77..3090dd4342f6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2881,17 +2881,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.u.vht_opmode_notif.action_code) { case WLAN_VHT_ACTION_OPMODE_NOTIF: { - u8 opmode; - /* verify opmode is present */ if (len < IEEE80211_MIN_ACTION_SIZE + 2) goto invalid; - - opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; - - ieee80211_vht_handle_opmode(rx->sdata, rx->sta, - opmode, status->band); - goto handled; + goto queue; } case WLAN_VHT_ACTION_GROUPID_MGMT: { if (len < IEEE80211_MIN_ACTION_SIZE + 25) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 6832bf6ab69f..43e45bb660bc 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -527,8 +527,10 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band); - if (changed > 0) + if (changed > 0) { + ieee80211_recalc_min_chandef(sdata); rate_control_rate_update(local, sband, sta, changed); + } } void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, From ad5013d5699d30ded0cdbbc68b93b2aa28222c6e Mon Sep 17 00:00:00 2001 From: Colin King Date: Wed, 11 Jan 2017 11:43:10 +0000 Subject: [PATCH 154/258] perf/x86/intel: Use ULL constant to prevent undefined shift behaviour When x86_pmu.num_counters is 32 the shift of the integer constant 1 is exceeding 32bit and therefor undefined behaviour. Fix this by shifting 1ULL instead of 1. Reported-by: CoverityScan CID#1192105 ("Bad bit shift operation") Signed-off-by: Colin Ian King Cc: Andi Kleen Cc: Peter Zijlstra Cc: Kan Liang Cc: Stephane Eranian Cc: Alexander Shishkin Link: http://lkml.kernel.org/r/20170111114310.17928-1-colin.king@canonical.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 86138267b68a..d611cab214a6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void) x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1; if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", From d6169d04097fd9ddf811e63eae4e5cd71e6666e2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 11 Jan 2017 17:10:34 +0200 Subject: [PATCH 155/258] xhci: fix deadlock at host remove by running watchdog correctly If a URB is killed while the host is removed we can end up in a situation where the hub thread takes the roothub device lock, and waits for the URB to be given back by xhci-hcd, blocking the host remove code. xhci-hcd tries to stop the endpoint and give back the urb, but can't as the host is removed from PCI bus at the same time, preventing the normal way of giving back urb. Instead we need to rely on the stop command timeout function to give back the urb. This xhci_stop_endpoint_command_watchdog() timeout function used a XHCI_STATE_DYING flag to indicate if the timeout function is already running, but later this flag has been taking into use in other places to mark that xhci is dying. Remove checks for XHCI_STATE_DYING in xhci_urb_dequeue. We are still checking that reading from pci state does not return 0xffffffff or that host is not halted before trying to stop the endpoint. This whole area of stopping endpoints, giving back URBs, and the wathdog timeout need rework, this fix focuses on solving a specific deadlock issue that we can then send to stable before any major rework. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 11 ----------- drivers/usb/host/xhci.c | 13 ------------- 2 files changed, 24 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 25f522b09dd9..e32029a31ca4 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -913,17 +913,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) spin_lock_irqsave(&xhci->lock, flags); ep->stop_cmds_pending--; - if (xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } - if (xhci->xhc_state & XHCI_STATE_DYING) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Stop EP timer ran, but another timer marked " - "xHCI as DYING, exiting."); - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Stop EP timer ran, but no command pending, " diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0c8deb9ed42d..9a0ec116654a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1534,19 +1534,6 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) xhci_urb_free_priv(urb_priv); return ret; } - if ((xhci->xhc_state & XHCI_STATE_DYING) || - (xhci->xhc_state & XHCI_STATE_HALTED)) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Ep 0x%x: URB %p to be canceled on " - "non-responsive xHCI host.", - urb->ep->desc.bEndpointAddress, urb); - /* Let the stop endpoint command watchdog timer (which set this - * state) finish cleaning up the endpoint TD lists. We must - * have caught it in the middle of dropping a lock and giving - * back an URB. - */ - goto done; - } ep_index = xhci_get_endpoint_index(&urb->ep->desc); ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index]; From 1392370ee7de8aa3f69936f55bea6bfcc9879c59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jan 2017 14:29:02 +0300 Subject: [PATCH 156/258] nvme-rdma: fix nvme_rdma_queue_is_ready Now that we don't abuse the cmd field in struct request for nvme command passthrough this function needs to be converted to the proper accessor as well. Fixes: d49187e97e ("nvme: introduce struct nvme_request") Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f587af345889..34e564857716 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1422,7 +1422,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = (struct nvme_command *)rq->cmd; + struct nvme_command *cmd = nvme_req(rq)->cmd; if (rq->cmd_type != REQ_TYPE_DRV_PRIV || cmd->common.opcode != nvme_fabrics_command || From b5a10c5f7532b7473776da87e67f8301bbc32693 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 28 Dec 2016 22:13:15 -0200 Subject: [PATCH 157/258] nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time too Commit 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness") introduced a quirk to adapters that cannot read the bit NVME_CSTS_RDY right after register NVME_REG_CC is set; these adapters need a delay or else the action of reading the bit NVME_CSTS_RDY could somehow corrupt adapter's registers state and it never recovers. When this quirk was added, we checked ctrl->tagset in order to avoid quirking in probe time, supposing we would never require such delay during probe. Well, it was too optimistic; we in fact need this quirk at probe time in some cases, like after a kexec. In some experiments, after abnormal shutdown of machine (aka power cord unplug), we booted into our bootloader in Power, which is a Linux kernel, and kexec'ed into another distro. If this kexec is too quick, we end up reaching the probe of NVMe adapter in that distro when adapter is in bad state (not fully initialized on our bootloader). What happens next is that nvme_wait_ready() is unable to complete, except if the quirk is enabled. So, this patch removes the original ctrl->tagset verification in order to enable the quirk even on probe time. Fixes: 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness") Reported-by: Andrew Byrne Reported-by: Jaime A. H. Gomez Reported-by: Zachary D. Myers Signed-off-by: Guilherme G. Piccoli Acked-by: Jeffrey Lien Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2fc86dc7a8df..8a3c3e32a704 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1106,12 +1106,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) if (ret) return ret; - /* Checking for ctrl->tagset is a trick to avoid sleeping on module - * load, since we only need the quirk on reset_controller. Notice - * that the HGST device needs this delay only in firmware activation - * procedure; unfortunately we have no (easy) way to verify this. - */ - if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset) + if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); return nvme_wait_ready(ctrl, cap, false); From 0a417b8dc1f10b03e8f558b8a831f07ec4c23795 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Jan 2017 10:20:04 -0800 Subject: [PATCH 158/258] xfs: Timely free truncated dirty pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 99579ccec4e2 "xfs: skip dirty pages in ->releasepage()" started to skip dirty pages in xfs_vm_releasepage() which also has the effect that if a dirty page is truncated, it does not get freed by block_invalidatepage() and is lingering in LRU list waiting for reclaim. So a simple loop like: while true; do dd if=/dev/zero of=file bs=1M count=100 rm file done will keep using more and more memory until we hit low watermarks and start pagecache reclaim which will eventually reclaim also the truncate pages. Keeping these truncated (and thus never usable) pages in memory is just a waste of memory, is unnecessarily stressing page cache reclaim, and reportedly also leads to anonymous mmap(2) returning ENOMEM prematurely. So instead of just skipping dirty pages in xfs_vm_releasepage(), return to old behavior of skipping them only if they have delalloc or unwritten buffers and fix the spurious warnings by warning only if the page is clean. CC: stable@vger.kernel.org CC: Brian Foster CC: Vlastimil Babka Reported-by: Petr Tůma Fixes: 99579ccec4e271c3d4d4e7c946058766812afdab Signed-off-by: Jan Kara Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0f56fcd3a5d5..631e7c0e0a29 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1152,19 +1152,22 @@ xfs_vm_releasepage( * block_invalidatepage() can send pages that are still marked dirty * but otherwise have invalidated buffers. * - * We've historically freed buffers on the latter. Instead, quietly - * filter out all dirty pages to avoid spurious buffer state warnings. - * This can likely be removed once shrink_active_list() is fixed. + * We want to release the latter to avoid unnecessary buildup of the + * LRU, skip the former and warn if we've left any lingering + * delalloc/unwritten buffers on clean pages. Skip pages with delalloc + * or unwritten buffers and warn if the page is not dirty. Otherwise + * try to release the buffers. */ - if (PageDirty(page)) - return 0; - xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON_ONCE(delalloc)) + if (delalloc) { + WARN_ON_ONCE(!PageDirty(page)); return 0; - if (WARN_ON_ONCE(unwritten)) + } + if (unwritten) { + WARN_ON_ONCE(!PageDirty(page)); return 0; + } return try_to_free_buffers(page); } From 6ed0993a0b859ce62edf2930ded683e452286d39 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Jan 2017 09:27:49 +0300 Subject: [PATCH 159/258] vfio-mdev: return -EFAULT if copy_to_user() fails The copy_to_user() function returns the number of bytes which it wasn't able to copy but we want to return a negative error code. Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.") Signed-off-by: Dan Carpenter Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 1fc57a5093a7..975af5bbf28d 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1180,7 +1180,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, memcpy(&mdev_state->dev_info, &info, sizeof(info)); - return copy_to_user((void __user *)arg, &info, minsz); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; } case VFIO_DEVICE_GET_REGION_INFO: { @@ -1201,7 +1204,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; } case VFIO_DEVICE_GET_IRQ_INFO: @@ -1224,7 +1230,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.count == -1) return -EINVAL; - return copy_to_user((void __user *)arg, &info, minsz); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; } case VFIO_DEVICE_SET_IRQS: { From 5c677869e0abbffbade2cfd82d46d0eebe823f34 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Jan 2017 09:28:40 +0300 Subject: [PATCH 160/258] vfio-mdev: buffer overflow in ioctl() This is a sample driver for documentation so the impact is probably pretty low. But we should check that bar_index is valid so we don't write beyond the end of the mdev_state->region_info[] array. Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.") Signed-off-by: Dan Carpenter Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 975af5bbf28d..382f4797428f 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1073,7 +1073,7 @@ int mtty_get_region_info(struct mdev_device *mdev, { unsigned int size = 0; struct mdev_state *mdev_state; - int bar_index; + u32 bar_index; if (!mdev) return -EINVAL; @@ -1082,8 +1082,11 @@ int mtty_get_region_info(struct mdev_device *mdev, if (!mdev_state) return -EINVAL; - mutex_lock(&mdev_state->ops_lock); bar_index = region_info->index; + if (bar_index >= VFIO_PCI_NUM_REGIONS) + return -EINVAL; + + mutex_lock(&mdev_state->ops_lock); switch (bar_index) { case VFIO_PCI_CONFIG_REGION_INDEX: From 73da4268fdbae972f617946d1c690f2136964802 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Jan 2017 09:30:08 +0300 Subject: [PATCH 161/258] vfio-mdev: remove some dead code We set info.count to 1 in mtty_get_irq_info() so static checkers complain that, "Why do we have impossible conditions?" The answer is that it seems to be left over dead code that can be safely removed. Signed-off-by: Dan Carpenter Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- samples/vfio-mdev/mtty.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 382f4797428f..ca495686b9c3 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1230,9 +1230,6 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd, if (ret) return ret; - if (info.count == -1) - return -EINVAL; - if (copy_to_user((void __user *)arg, &info, minsz)) return -EFAULT; From a89af4abdf9b353cdd6f61afc0eaaac403304873 Mon Sep 17 00:00:00 2001 From: Brendan McGrath Date: Sat, 7 Jan 2017 08:01:38 +1100 Subject: [PATCH 162/258] HID: i2c-hid: Add sleep between POWER ON and RESET Support for the Asus Touchpad was recently added. It turns out this device can fail initialisation (and become unusable) when the RESET command is sent too soon after the POWER ON command. Unfortunately the i2c-hid specification does not specify the need for a delay between these two commands. But it was discovered the Windows driver has a 1ms delay. As a result, this patch modifies the i2c-hid module to add a sleep inbetween the POWER ON and RESET commands which lasts between 1ms and 5ms. See https://github.com/vlasenko/hid-asus-dkms/issues/24 for further details. Signed-off-by: Brendan McGrath Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 78fb32a7b103..ea3c3546cef7 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -426,6 +426,15 @@ static int i2c_hid_hwreset(struct i2c_client *client) if (ret) goto out_unlock; + /* + * The HID over I2C specification states that if a DEVICE needs time + * after the PWR_ON request, it should utilise CLOCK stretching. + * However, it has been observered that the Windows driver provides a + * 1ms sleep between the PWR_ON and RESET requests and that some devices + * rely on this. + */ + usleep_range(1000, 5000); + i2c_hid_dbg(ihid, "resetting...\n"); ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0); From 19c0f40d4fca3a47b8f784a627f0467f0138ccc8 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 11 Jan 2017 16:25:34 +0800 Subject: [PATCH 163/258] r8152: fix the sw rx checksum is unavailable Fix the hw rx checksum is always enabled, and the user couldn't switch it to sw rx checksum. Note that the RTL_VER_01 only support sw rx checksum only. Besides, the hw rx checksum for RTL_VER_02 is disabled after commit b9a321b48af4 ("r8152: Fix broken RX checksums."). Re-enable it. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index be418563cb18..f3b48ad90865 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) + if (!(tp->netdev->features & NETIF_F_RXCSUM)) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -4356,6 +4356,11 @@ static int rtl8152_probe(struct usb_interface *intf, NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + if (tp->version == RTL_VER_01) { + netdev->features &= ~NETIF_F_RXCSUM; + netdev->hw_features &= ~NETIF_F_RXCSUM; + } + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); From 4ecb1d83f6abe8d49163427f4d431ebe98f8bd5f Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Wed, 11 Jan 2017 15:18:53 +0000 Subject: [PATCH 164/258] vxlan: Set ports in flow key when doing route lookups Otherwise, a xfrm policy with sport/dport being set cannot be matched. Signed-off-by: Martynas Pumputis Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index bb70dd5723b5..ca7196c40060 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1798,7 +1798,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev, struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, - __be32 daddr, __be32 *saddr, + __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { @@ -1824,6 +1824,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = daddr; fl4.saddr = *saddr; + fl4.fl4_dport = dport; + fl4.fl4_sport = sport; rt = ip_route_output_key(vxlan->net, &fl4); if (likely(!IS_ERR(rt))) { @@ -1851,6 +1853,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, __be32 label, const struct in6_addr *daddr, struct in6_addr *saddr, + __be16 dport, __be16 sport, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { @@ -1877,6 +1880,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = dport; + fl6.fl6_sport = sport; err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk, @@ -2068,6 +2073,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rdst ? rdst->remote_ifindex : 0, tos, dst->sin.sin_addr.s_addr, &src->sin.sin_addr.s_addr, + dst_port, src_port, dst_cache, info); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -2104,6 +2110,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rdst ? rdst->remote_ifindex : 0, tos, label, &dst->sin6.sin6_addr, &src->sin6.sin6_addr, + dst_port, src_port, dst_cache, info); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); @@ -2430,7 +2437,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, - &info->key.u.ipv4.src, NULL, info); + &info->key.u.ipv4.src, dport, sport, NULL, info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); @@ -2441,7 +2448,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, - &info->key.u.ipv6.src, NULL, info); + &info->key.u.ipv6.src, dport, sport, NULL, info); if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); From 0719e72ccb801829a3d735d187ca8417f0930459 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 11 Jan 2017 09:16:32 -0800 Subject: [PATCH 165/258] netvsc: add rcu_read locking to netvsc callback The receive callback (in tasklet context) is using RCU to get reference to associated VF network device but this is not safe. RCU read lock needs to be held. Found by running with full lockdep debugging enabled. Fixes: f207c10d9823 ("hv_netvsc: use RCU to protect vf_netdev") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c9414c054852..fcab8019dda0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -659,6 +659,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * policy filters on the host). Deliver these via the VF * interface in the guest. */ + rcu_read_lock(); vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; @@ -667,6 +668,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; + rcu_read_unlock(); return NVSP_STAT_FAIL; } @@ -696,6 +698,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * TODO - use NAPI? */ netif_rx(skb); + rcu_read_unlock(); return 0; } From 900742d89c1b4e04bd373aec8470b88e183f08ca Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Jan 2017 12:00:22 -0600 Subject: [PATCH 166/258] x86/unwind: Silence warnings for non-current tasks There are a handful of callers to save_stack_trace_tsk() and show_stack() which try to unwind the stack of a task other than current. In such cases, it's remotely possible that the task is running on one CPU while the unwinder is reading its stack from another CPU, causing the unwinder to see stack corruption. These cases seem to be mostly harmless. The unwinder has checks which prevent it from following bad pointers beyond the bounds of the stack. So it's not really a bug as long as the caller understands that unwinding another task will not always succeed. Since stack "corruption" on another task's stack isn't necessarily a bug, silence the warnings when unwinding tasks other than current. Reported-by: Dave Jones Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Miroslav Benes Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/00d8c50eea3446c1524a2a755397a3966629354c.1483978430.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 4443e499f279..195eebf6da20 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -207,6 +207,16 @@ bool unwind_next_frame(struct unwind_state *state) return true; bad_address: + /* + * When unwinding a non-current task, the task might actually be + * running on another CPU, in which case it could be modifying its + * stack while we're reading it. This is generally not a problem and + * can be ignored as long as the caller understands that unwinding + * another task will not always succeed. + */ + if (state->task != current) + goto the_end; + if (state->regs) { printk_deferred_once(KERN_WARNING "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", From 84936118bdf37bda513d4a361c38181a216427e0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Jan 2017 12:00:23 -0600 Subject: [PATCH 167/258] x86/unwind: Disable KASAN checks for non-current tasks There are a handful of callers to save_stack_trace_tsk() and show_stack() which try to unwind the stack of a task other than current. In such cases, it's remotely possible that the task is running on one CPU while the unwinder is reading its stack from another CPU, causing the unwinder to see stack corruption. These cases seem to be mostly harmless. The unwinder has checks which prevent it from following bad pointers beyond the bounds of the stack. So it's not really a bug as long as the caller understands that unwinding another task will not always succeed. In such cases, it's possible that the unwinder may read a KASAN-poisoned region of the stack. Account for that by using READ_ONCE_NOCHECK() when reading the stack of another task. Use READ_ONCE() when reading the stack of the current task, since KASAN warnings can still be useful for finding bugs in that case. Reported-by: Dmitry Vyukov Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Miroslav Benes Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4c575eb288ba9f73d498dfe0acde2f58674598f1.1483978430.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 5 ++++- arch/x86/kernel/unwind_frame.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a3269c897ec5..20ce3db20f24 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -52,13 +52,16 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len) static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { + struct inactive_task_frame *frame; + if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); - return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp; + frame = (struct inactive_task_frame *)task->thread.sp; + return (unsigned long *)READ_ONCE_NOCHECK(frame->bp); } #else static inline unsigned long * diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 195eebf6da20..23d15565d02a 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -6,6 +6,21 @@ #define FRAME_HEADER_SIZE (sizeof(long) * 2) +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + if (task == current) \ + val = READ_ONCE(x); \ + else \ + val = READ_ONCE_NOCHECK(x); \ + val; \ +}) + static void unwind_dump(struct unwind_state *state, unsigned long *sp) { static bool dumped_before = false; @@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state) if (state->regs && user_mode(state->regs)) return 0; - addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, + addr = READ_ONCE_TASK_STACK(state->task, *addr_p); + addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); return __kernel_text_address(addr) ? addr : 0; @@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state) if (state->regs) next_bp = (unsigned long *)state->regs->bp; else - next_bp = (unsigned long *)*state->bp; + next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); /* is the next frame pointer an encoded pointer to pt_regs? */ regs = decode_frame_pointer(next_bp); From 2c96b2fe9c57b4267c3f0a680d82d7cc52e1c447 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Jan 2017 12:00:24 -0600 Subject: [PATCH 168/258] x86/unwind: Include __schedule() in stack traces In the following commit: 0100301bfdf5 ("sched/x86: Rewrite the switch_to() code") ... the layout of the 'inactive_task_frame' struct was designed to have a frame pointer header embedded in it, so that the unwinder could use the 'bp' and 'ret_addr' fields to report __schedule() on the stack (or ret_from_fork() for newly forked tasks which haven't actually run yet). Finish the job by changing get_frame_pointer() to return a pointer to inactive_task_frame's 'bp' field rather than 'bp' itself. This allows the unwinder to start one frame higher on the stack, so that it properly reports __schedule(). Reported-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/598e9f7505ed0aba86e8b9590aa528c6c7ae8dcd.1483978430.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 5 +---- arch/x86/include/asm/switch_to.h | 10 +++++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 20ce3db20f24..2e41c50ddf47 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -52,16 +52,13 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len) static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { - struct inactive_task_frame *frame; - if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); - frame = (struct inactive_task_frame *)task->thread.sp; - return (unsigned long *)READ_ONCE_NOCHECK(frame->bp); + return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5cb436acd463..fcc5cd387fd1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev, asmlinkage void ret_from_fork(void); -/* data that is pointed to by thread.sp */ +/* + * This is the structure pointed to by thread.sp for an inactive task. The + * order of the fields must match the code in __switch_to_asm(). + */ struct inactive_task_frame { #ifdef CONFIG_X86_64 unsigned long r15; @@ -48,6 +51,11 @@ struct inactive_task_frame { unsigned long di; #endif unsigned long bx; + + /* + * These two fields must be together. They form a stack frame header, + * needed by get_frame_pointer(). + */ unsigned long bp; unsigned long ret_addr; }; From ff3f7e2475bbf9201e95824e72698fcdc5c3d47a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Jan 2017 12:00:25 -0600 Subject: [PATCH 169/258] x86/entry: Fix the end of the stack for newly forked tasks When unwinding a task, the end of the stack is always at the same offset right below the saved pt_regs, regardless of which syscall was used to enter the kernel. That convention allows the unwinder to verify that a stack is sane. However, newly forked tasks don't always follow that convention, as reported by the following unwinder warning seen by Dave Jones: WARNING: kernel stack frame pointer at ffffc90001443f30 in kworker/u8:8:30468 has bad value (null) The warning was due to the following call chain: (ftrace handler) call_usermodehelper_exec_async+0x5/0x140 ret_from_fork+0x22/0x30 The problem is that ret_from_fork() doesn't create a stack frame before calling other functions. Fix that by carefully using the frame pointer macros. In addition to conforming to the end of stack convention, this also makes related stack traces more sensible by making it clear to the user that ret_from_fork() was involved. Reported-by: Dave Jones Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Miroslav Benes Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8854cdaab980e9700a81e9ebf0d4238e4bbb68ef.1483978430.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 30 +++++++++++------------------- arch/x86/entry/entry_64.S | 11 +++++++---- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 701d29f8e4d3..57f7ec35216e 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -254,23 +254,6 @@ ENTRY(__switch_to_asm) jmp __switch_to END(__switch_to_asm) -/* - * The unwinder expects the last frame on the stack to always be at the same - * offset from the end of the page, which allows it to validate the stack. - * Calling schedule_tail() directly would break that convention because its an - * asmlinkage function so its argument has to be pushed on the stack. This - * wrapper creates a proper "end of stack" frame header before the call. - */ -ENTRY(schedule_tail_wrapper) - FRAME_BEGIN - - pushl %eax - call schedule_tail - popl %eax - - FRAME_END - ret -ENDPROC(schedule_tail_wrapper) /* * A newly forked process directly context switches into this address. * @@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper) * edi: kernel thread arg */ ENTRY(ret_from_fork) - call schedule_tail_wrapper + FRAME_BEGIN /* help unwinder find end of stack */ + + /* + * schedule_tail() is asmlinkage so we have to put its 'prev' argument + * on the stack. + */ + pushl %eax + call schedule_tail + popl %eax testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - movl %esp, %eax + leal FRAME_OFFSET(%esp), %eax call syscall_return_slowpath + FRAME_END jmp restore_all /* kernel thread */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5b219707c2f2..044d18ebc43c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .code64 @@ -408,17 +409,19 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) + FRAME_BEGIN /* help unwinder find end of stack */ movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ + call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ 2: - movq %rsp, %rdi + leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */ call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS + FRAME_END jmp restore_regs_and_iret 1: From e1d070c3793a2766122865a7c2142853b48808c5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2016 00:19:19 +0100 Subject: [PATCH 170/258] mmc: sdhci-acpi: Only powered up enabled acpi child devices Commit e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are powered when probing") introduced code to powerup any acpi child nodes listed in the dstd. But some dstd-s list all possible devices used on some board variants, while reporting if the device is actually present and enabled in the status field of the device. So we end up calling the acpi _PS0 (power-on) method for devices which are not actually present. This does not always end well, e.g. on my cube iwork8 air tablet, this results in freezing the entire tablet as soon as the r8723bs module is loaded. This commit fixes this by checking the child device's status.present and status.enabled bits and only call acpi_device_fix_up_power() if both are set. Fixes: e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are powered when probing") BugLink: https://github.com/hadess/rtl8723bs/issues/80 Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 160f695cc09c..278a5a435ab7 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -395,7 +395,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev) /* Power on the SDHCI controller and its children */ acpi_device_fix_up_power(device); list_for_each_entry(child, &device->children, node) - acpi_device_fix_up_power(child); + if (child->status.present && child->status.enabled) + acpi_device_fix_up_power(child); if (acpi_bus_get_status(device) || !device->status.present) return -ENODEV; From 01167c7b9cbf099c69fe411a228e4e9c7104e123 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 5 Jan 2017 19:24:04 +0000 Subject: [PATCH 171/258] mmc: mxs-mmc: Fix additional cycles after transmission stop According to the code the intention is to append 8 SCK cycles instead of 4 at end of a MMC_STOP_TRANSMISSION command. But this will never happened because it's an AC command not an ADTC command. So fix this by moving the statement into the right function. Signed-off-by: Stefan Wahren Fixes: e4243f13d10e (mmc: mxs-mmc: add mmc host driver for i.MX23/28) Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/mxs-mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 44ecebd1ea8c..c8b8ac66ff7e 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -309,6 +309,9 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host) cmd0 = BF_SSP(cmd->opcode, CMD0_CMD); cmd1 = cmd->arg; + if (cmd->opcode == MMC_STOP_TRANSMISSION) + cmd0 |= BM_SSP_CMD0_APPEND_8CYC; + if (host->sdio_irq_en) { ctrl0 |= BM_SSP_CTRL0_SDIO_IRQ_CHECK; cmd0 |= BM_SSP_CMD0_CONT_CLKING_EN | BM_SSP_CMD0_SLOW_CLKING_EN; @@ -417,8 +420,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host) ssp->base + HW_SSP_BLOCK_SIZE); } - if ((cmd->opcode == MMC_STOP_TRANSMISSION) || - (cmd->opcode == SD_IO_RW_EXTENDED)) + if (cmd->opcode == SD_IO_RW_EXTENDED) cmd0 |= BM_SSP_CMD0_APPEND_8CYC; cmd1 = cmd->arg; From b6416e61012429e0277bd15a229222fd17afc1c1 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 16 Dec 2016 14:30:35 -0800 Subject: [PATCH 172/258] jump_labels: API for flushing deferred jump label updates Modules that use static_key_deferred need a way to synchronize with any delayed work that is still pending when the module is unloaded. Introduce static_key_deferred_flush() which flushes any pending jump label updates. Signed-off-by: David Matlack Cc: stable@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- include/linux/jump_label_ratelimit.h | 5 +++++ kernel/jump_label.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 089f70f83e97..23da3af459fe 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -14,6 +14,7 @@ struct static_key_deferred { #ifdef HAVE_JUMP_LABEL extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern void static_key_deferred_flush(struct static_key_deferred *key); extern void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); @@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) STATIC_KEY_CHECK_USE(); static_key_slow_dec(&key->key); } +static inline void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); +} static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93ad6c1fb9b6..a9b8cf500591 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); +void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); + flush_delayed_work(&key->work); +} +EXPORT_SYMBOL_GPL(static_key_deferred_flush); + void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { From cef84c302fe051744b983a92764d3fcca933415d Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 16 Dec 2016 14:30:36 -0800 Subject: [PATCH 173/258] KVM: x86: flush pending lapic jump label updates on module unload KVM's lapic emulation uses static_key_deferred (apic_{hw,sw}_disabled). These are implemented with delayed_work structs which can still be pending when the KVM module is unloaded. We've seen this cause kernel panics when the kvm_intel module is quickly reloaded. Use the new static_key_deferred_flush() API to flush pending updates on module unload. Signed-off-by: David Matlack Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 6 ++++++ arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/x86.c | 1 + 3 files changed, 8 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5fe290c1b7d8..2f6ef5121a4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2426,3 +2426,9 @@ void kvm_lapic_init(void) jump_label_rate_limit(&apic_hw_disabled, HZ); jump_label_rate_limit(&apic_sw_disabled, HZ); } + +void kvm_lapic_exit(void) +{ + static_key_deferred_flush(&apic_hw_disabled); + static_key_deferred_flush(&apic_sw_disabled); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e0c80233b3e1..ff8039d61672 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -110,6 +110,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); void kvm_lapic_init(void); +void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f22810a7e0c..a0ac6e0060fb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6045,6 +6045,7 @@ out: void kvm_arch_exit(void) { + kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) From 129a72a0d3c8e139a04512325384fe5ac119e74d Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 11 Jan 2017 18:28:29 -0800 Subject: [PATCH 174/258] KVM: x86: Introduce segmented_write_std Introduces segemented_write_std. Switches from emulated reads/writes to standard read/writes in fxsave, fxrstor, sgdt, and sidt. This fixes CVE-2017-2584, a longstanding kernel memory leak. Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR", 2016-11-09), which is luckily not yet in any final release, this would also be an exploitable kernel memory *write*! Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 96051572c819194c37a8367624b285be10297eca Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62 Suggested-by: Paolo Bonzini Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56628a44668b..f36d0fa6b885 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -818,6 +818,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. @@ -3685,8 +3699,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) @@ -3932,7 +3946,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) else size = offsetof(struct fxregs_state, xmm_space[0]); - return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size); + return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); } static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, @@ -3974,7 +3988,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); if (rc != X86EMUL_CONTINUE) return rc; From 4f3dbdf47e150016aacd734e663347fcaa768303 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 5 Jan 2017 17:39:42 -0800 Subject: [PATCH 175/258] KVM: eventfd: fix NULL deref irqbypass consumer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported syzkaller: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] PGD 0 Oops: 0002 [#1] SMP CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1 Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm] task: ffff9bbe0dfbb900 task.stack: ffffb61802014000 RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] Call Trace: irqfd_shutdown+0x66/0xa0 [kvm] process_one_work+0x16b/0x480 worker_thread+0x4b/0x500 kthread+0x101/0x140 ? process_one_work+0x480/0x480 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x25/0x30 RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20 CR2: 0000000000000008 The syzkaller folks reported a NULL pointer dereference that due to unregister an consumer which fails registration before. The syzkaller creates two VMs w/ an equal eventfd occasionally. So the second VM fails to register an irqbypass consumer. It will make irqfd as inactive and queue an workqueue work to shutdown irqfd and unregister the irqbypass consumer when eventfd is closed. However, the second consumer has been initialized though it fails registration. So the token(same as the first VM's) is taken to unregister the consumer through the workqueue, the consumer of the first VM is found and unregistered, then NULL deref incurred in the path of deleting consumer from the consumers list. This patch fixes it by making irq_bypass_register/unregister_consumer() looks for the consumer entry based on consumer pointer itself instead of token matching. Reported-by: Dmitry Vyukov Suggested-by: Alex Williamson Cc: stable@vger.kernel.org Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: Alex Williamson Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- virt/lib/irqbypass.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 52abac4bb6a2..6d2fcd6fcb25 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token == consumer->token) { + if (tmp->token == consumer->token || tmp == consumer) { mutex_unlock(&lock); module_put(THIS_MODULE); return -EBUSY; @@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token != consumer->token) + if (tmp != consumer) continue; list_for_each_entry(producer, &producers, node) { From 546d87e5c903a7f3ee7b9f998949a94729fbc65b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 3 Jan 2017 18:56:19 -0800 Subject: [PATCH 176/258] KVM: x86: fix NULL deref in vcpu_scan_ioapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001b0 IP: _raw_spin_lock+0xc/0x30 PGD 3e28eb067 PUD 3f0ac6067 PMD 0 Oops: 0002 [#1] SMP CPU: 0 PID: 2431 Comm: test Tainted: G OE 4.10.0-rc1+ #3 Call Trace: ? kvm_ioapic_scan_entry+0x3e/0x110 [kvm] kvm_arch_vcpu_ioctl_run+0x10a8/0x15f0 [kvm] ? pick_next_task_fair+0xe1/0x4e0 ? kvm_arch_vcpu_load+0xea/0x260 [kvm] kvm_vcpu_ioctl+0x33a/0x600 [kvm] ? hrtimer_try_to_cancel+0x29/0x130 ? do_nanosleep+0x97/0xf0 do_vfs_ioctl+0xa1/0x5d0 ? __hrtimer_init+0x90/0x90 ? do_nanosleep+0x5b/0xf0 SyS_ioctl+0x79/0x90 do_syscall_64+0x6e/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: _raw_spin_lock+0xc/0x30 RSP: ffffa43688973cc0 The syzkaller folks reported a NULL pointer dereference due to ENABLE_CAP succeeding even without an irqchip. The Hyper-V synthetic interrupt controller is activated, resulting in a wrong request to rescan the ioapic and a NULL pointer dereference. #include #include #include #include #include #include #include #include #include #include #ifndef KVM_CAP_HYPERV_SYNIC #define KVM_CAP_HYPERV_SYNIC 123 #endif void* thr(void* arg) { struct kvm_enable_cap cap; cap.flags = 0; cap.cap = KVM_CAP_HYPERV_SYNIC; ioctl((long)arg, KVM_ENABLE_CAP, &cap); return 0; } int main() { void *host_mem = mmap(0, 0x1000, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); int kvmfd = open("/dev/kvm", 0); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); struct kvm_userspace_memory_region memreg; memreg.slot = 0; memreg.flags = 0; memreg.guest_phys_addr = 0; memreg.memory_size = 0x1000; memreg.userspace_addr = (unsigned long)host_mem; host_mem[0] = 0xf4; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); int cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); struct kvm_sregs sregs; ioctl(cpufd, KVM_GET_SREGS, &sregs); sregs.cr0 = 0; sregs.cr4 = 0; sregs.efer = 0; sregs.cs.selector = 0; sregs.cs.base = 0; ioctl(cpufd, KVM_SET_SREGS, &sregs); struct kvm_regs regs = { .rflags = 2 }; ioctl(cpufd, KVM_SET_REGS, ®s); ioctl(vmfd, KVM_CREATE_IRQCHIP, 0); pthread_t th; pthread_create(&th, 0, thr, (void*)(long)cpufd); usleep(rand() % 10000); ioctl(cpufd, KVM_RUN, 0); pthread_join(th, 0); return 0; } This patch fixes it by failing ENABLE_CAP if without an irqchip. Reported-by: Dmitry Vyukov Fixes: 5c919412fe61 (kvm/x86: Hyper-V synthetic interrupt controller) Cc: stable@vger.kernel.org # 4.5+ Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a0ac6e0060fb..57d8a856cdc5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3342,6 +3342,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, switch (cap->cap) { case KVM_CAP_HYPERV_SYNIC: + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; return kvm_hv_activate_synic(vcpu); default: return -EINVAL; From 19c816e8e455f58da9997e4c6626f06203d8fbf0 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 12 Jan 2017 16:52:02 +0800 Subject: [PATCH 177/258] capability: export has_capability has_capability() is sometimes needed by modules to test capability for specified task other than current, so export it. Cc: Kirti Wankhede Signed-off-by: Jike Song Acked-by: Serge Hallyn Acked-by: James Morris Signed-off-by: Alex Williamson --- kernel/capability.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/capability.c b/kernel/capability.c index a98e814f216f..f97fe77ceb88 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -318,6 +318,7 @@ bool has_capability(struct task_struct *t, int cap) { return has_ns_capability(t, &init_user_ns, cap); } +EXPORT_SYMBOL(has_capability); /** * has_ns_capability_noaudit - Does a task have a capability (unaudited) From 33ab91103b3415e12457e3104f0e4517ce12d0f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 15:02:32 +0100 Subject: [PATCH 178/258] KVM: x86: fix emulation of "MOV SS, null selector" This is CVE-2017-2583. On Intel this causes a failed vmentry because SS's type is neither 3 nor 7 (even though the manual says this check is only done for usable SS, and the dmesg splat says that SS is unusable!). On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb. The fix fabricates a data segment descriptor when SS is set to a null selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb. Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3; this in turn ensures CPL < 3 because RPL must be equal to CPL. Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing the bug and deciphering the manuals. Reported-by: Xiaohan Zhang Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011 Cc: stable@nongnu.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f36d0fa6b885..cedbba0f3402 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1585,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } -/* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1622,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) @@ -1751,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + return __load_segment_descriptor(ctxt, selector, seg, cpl, X86_TRANSFER_NONE, NULL); } From 36bf38d158d3482119b3e159c0619b3c1539b508 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 12 Jan 2017 09:10:37 +0100 Subject: [PATCH 179/258] mlxsw: spectrum: Fix memory leak at skb reallocation During transmission the skb is checked for headroom in order to add vendor specific header. In case the skb needs to be re-allocated, skb_realloc_headroom() is called to make a private copy of the original, but doesn't release it. Current code assumes that the original skb is released during reallocation and only releases it at the error path which causes a memory leak. Fix this by adding the original skb release to the main path. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Arkadi Sharshevsky Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d768c7b6c6d6..003093abb170 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -684,6 +684,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } if (eth_skb_pad(skb)) { From 400fc0106dd8c27ed84781c929c1a184785b9c79 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 12 Jan 2017 09:10:38 +0100 Subject: [PATCH 180/258] mlxsw: switchx2: Fix memory leak at skb reallocation During transmission the skb is checked for headroom in order to add vendor specific header. In case the skb needs to be re-allocated, skb_realloc_headroom() is called to make a private copy of the original, but doesn't release it. Current code assumes that the original skb is released during reallocation and only releases it at the error path which causes a memory leak. Fix this by adding the original skb release to the main path. Fixes: d003462a50de ("mlxsw: Simplify mlxsw_sx_port_xmit function") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 150ccf5192a9..2e88115e8735 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -345,6 +345,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } mlxsw_sx_txhdr_construct(skb, &tx_info); /* TX header is consumed by HW on the way so we shouldn't count its From 28e46a0f2e03ab4ed0e23cace1ea89a68c8c115b Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 12 Jan 2017 09:10:39 +0100 Subject: [PATCH 181/258] mlxsw: pci: Fix EQE structure definition The event_data starts from address 0x00-0x0C and not from 0x08-0x014. This leads to duplication with other fields in the Event Queue Element such as sub-type, cqn and owner. Fixes: eda6500a987a0 ("mlxsw: Add PCI bus implementation") Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index d147ddd97997..0af3338bfcb4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -209,21 +209,21 @@ MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); /* pci_eqe_cmd_token * Command completion event - token */ -MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); +MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16); /* pci_eqe_cmd_status * Command completion event - status */ -MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); +MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8); /* pci_eqe_cmd_out_param_h * Command completion event - output parameter - higher part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32); /* pci_eqe_cmd_out_param_l * Command completion event - output parameter - lower part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32); #endif From f99e86485cc32cd16e5cc97f9bb0474f28608d84 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 12 Jan 2017 07:58:32 -0700 Subject: [PATCH 182/258] block: Rename blk_queue_zone_size and bdev_zone_size All block device data fields and functions returning a number of 512B sectors are by convention named xxx_sectors while names in the form xxx_size are generally used for a number of bytes. The blk_queue_zone_size and bdev_zone_size functions were not following this convention so rename them. No functional change is introduced by this patch. Signed-off-by: Damien Le Moal Collapsed the two patches, they were nonsensically split and broke bisection. Signed-off-by: Jens Axboe --- block/blk-zoned.c | 4 ++-- block/partition-generic.c | 14 +++++++------- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 6 +++--- include/linux/blkdev.h | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 472211fa183a..3bd15d8095b1 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -16,7 +16,7 @@ static inline sector_t blk_zone_start(struct request_queue *q, sector_t sector) { - sector_t zone_mask = blk_queue_zone_size(q) - 1; + sector_t zone_mask = blk_queue_zone_sectors(q) - 1; return sector & ~zone_mask; } @@ -222,7 +222,7 @@ int blkdev_reset_zones(struct block_device *bdev, return -EINVAL; /* Check alignment (handle eventual smaller last zone) */ - zone_sectors = blk_queue_zone_size(q); + zone_sectors = blk_queue_zone_sectors(q); if (sector & (zone_sectors - 1)) return -EINVAL; diff --git a/block/partition-generic.c b/block/partition-generic.c index d7beb6bbbf66..7afb9907821f 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -434,7 +434,7 @@ static bool part_zone_aligned(struct gendisk *disk, struct block_device *bdev, sector_t from, sector_t size) { - unsigned int zone_size = bdev_zone_size(bdev); + unsigned int zone_sectors = bdev_zone_sectors(bdev); /* * If this function is called, then the disk is a zoned block device @@ -446,7 +446,7 @@ static bool part_zone_aligned(struct gendisk *disk, * regular block devices (no zone operation) and their zone size will * be reported as 0. Allow this case. */ - if (!zone_size) + if (!zone_sectors) return true; /* @@ -455,24 +455,24 @@ static bool part_zone_aligned(struct gendisk *disk, * use it. Check the zone size too: it should be a power of 2 number * of sectors. */ - if (WARN_ON_ONCE(!is_power_of_2(zone_size))) { + if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) { u32 rem; - div_u64_rem(from, zone_size, &rem); + div_u64_rem(from, zone_sectors, &rem); if (rem) return false; if ((from + size) < get_capacity(disk)) { - div_u64_rem(size, zone_size, &rem); + div_u64_rem(size, zone_sectors, &rem); if (rem) return false; } } else { - if (from & (zone_size - 1)) + if (from & (zone_sectors - 1)) return false; if ((from + size) < get_capacity(disk) && - (size & (zone_size - 1))) + (size & (zone_sectors - 1))) return false; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0738f48293cc..0d8802453758 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -713,8 +713,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } sector = SECTOR_FROM_BLOCK(blkstart); - if (sector & (bdev_zone_size(bdev) - 1) || - nr_sects != bdev_zone_size(bdev)) { + if (sector & (bdev_zone_sectors(bdev) - 1) || + nr_sects != bdev_zone_sectors(bdev)) { f2fs_msg(sbi->sb, KERN_INFO, "(%d) %s: Unaligned discard attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path: "", diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 702638e21c76..46fd30d8af77 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1553,16 +1553,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != - SECTOR_TO_BLOCK(bdev_zone_size(bdev))) + SECTOR_TO_BLOCK(bdev_zone_sectors(bdev))) return -EINVAL; - sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev)); + sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)); if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz != __ilog2_u32(sbi->blocks_per_blkz)) return -EINVAL; sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz); FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >> sbi->log_blocks_per_blkz; - if (nr_sectors & (bdev_zone_size(bdev) - 1)) + if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83695641bd5e..ff3d774f2751 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -739,7 +739,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } -static inline unsigned int blk_queue_zone_size(struct request_queue *q) +static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) { return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } @@ -1536,12 +1536,12 @@ static inline bool bdev_is_zoned(struct block_device *bdev) return false; } -static inline unsigned int bdev_zone_size(struct block_device *bdev) +static inline unsigned int bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); if (q) - return blk_queue_zone_size(q); + return blk_queue_zone_sectors(q); return 0; } From 18a3ed59d09cf81a6447aadf6931bf0c9ffec5e0 Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Thu, 12 Jan 2017 13:21:06 +0100 Subject: [PATCH 183/258] ravb: Remove Rx overflow log messages Remove Rx overflow log messages as in an environment where logging results in network traffic logging may cause further overflows. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Kazuya Mizuguchi [simon: reworked changelog] Signed-off-by: Simon Horman Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 92d7692c840d..5e5ad978eab9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -926,14 +926,10 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Receive error message handling */ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) { + if (priv->rx_over_errors != ndev->stats.rx_over_errors) ndev->stats.rx_over_errors = priv->rx_over_errors; - netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n"); - } - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) { + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; - netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n"); - } out: return budget - quota; } From 4b09ec4b14a168bf2c687e1f598140c3c11e9222 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 5 Jan 2017 10:20:16 -0500 Subject: [PATCH 184/258] nfs: Don't take a reference on fl->fl_file for LOCK operation I have reports of a crash that look like __fput() was called twice for a NFSv4.0 file. It seems possible that the state manager could try to reclaim a lock and take a reference on the fl->fl_file at the same time the file is being released if, during the close(), a signal interrupts the wait for outstanding IO while removing locks which then skips the removal of that lock. Since 83bfff23e9ed ("nfs4: have do_vfs_lock take an inode pointer") has removed the need to traverse fl->fl_file->f_inode in nfs4_lock_done(), taking that reference is no longer necessary. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dcbc5defb7a..700ed1fc1075 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -6127,7 +6126,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -6240,7 +6238,6 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); - fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } From 41c066f2c4d436c535616fe182331766c57838f0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 14:54:53 +0000 Subject: [PATCH 185/258] arm64: assembler: make adr_l work in modules under KASLR When CONFIG_RANDOMIZE_MODULE_REGION_FULL=y, the offset between loaded modules and the core kernel may exceed 4 GB, putting symbols exported by the core kernel out of the reach of the ordinary adrp/add instruction pairs used to generate relative symbol references. So make the adr_l macro emit a movz/movk sequence instead when executing in module context. While at it, remove the pointless special case for the stack pointer. Acked-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 36 ++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 446f6c46d4b1..3a4301163e04 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -164,22 +164,25 @@ lr .req x30 // link register /* * Pseudo-ops for PC-relative adr/ldr/str , where - * is within the range +/- 4 GB of the PC. + * is within the range +/- 4 GB of the PC when running + * in core kernel context. In module context, a movz/movk sequence + * is used, since modules may be loaded far away from the kernel + * when KASLR is in effect. */ /* * @dst: destination register (64 bit wide) * @sym: name of the symbol - * @tmp: optional scratch register to be used if == sp, which - * is not allowed in an adrp instruction */ - .macro adr_l, dst, sym, tmp= - .ifb \tmp + .macro adr_l, dst, sym +#ifndef MODULE adrp \dst, \sym add \dst, \dst, :lo12:\sym - .else - adrp \tmp, \sym - add \dst, \tmp, :lo12:\sym - .endif +#else + movz \dst, #:abs_g3:\sym + movk \dst, #:abs_g2_nc:\sym + movk \dst, #:abs_g1_nc:\sym + movk \dst, #:abs_g0_nc:\sym +#endif .endm /* @@ -190,6 +193,7 @@ lr .req x30 // link register * the address */ .macro ldr_l, dst, sym, tmp= +#ifndef MODULE .ifb \tmp adrp \dst, \sym ldr \dst, [\dst, :lo12:\sym] @@ -197,6 +201,15 @@ lr .req x30 // link register adrp \tmp, \sym ldr \dst, [\tmp, :lo12:\sym] .endif +#else + .ifb \tmp + adr_l \dst, \sym + ldr \dst, [\dst] + .else + adr_l \tmp, \sym + ldr \dst, [\tmp] + .endif +#endif .endm /* @@ -206,8 +219,13 @@ lr .req x30 // link register * while needs to be preserved. */ .macro str_l, src, sym, tmp +#ifndef MODULE adrp \tmp, \sym str \src, [\tmp, :lo12:\sym] +#else + adr_l \tmp, \sym + str \src, [\tmp] +#endif .endm /* From cc8e8342930129aa2c9b629e1653e4681f0896ea Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 4 Jan 2017 16:21:58 +0800 Subject: [PATCH 186/258] ceph: fix mds cluster availability check We should apply the check after getting the initial mdsmap. Fixes: e9e427f0a14f ("ceph: check availability of mds cluster on mount") Link: http://tracker.ceph.com/issues/18161 Signed-off-by: Yan, Zheng --- fs/ceph/mds_client.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4f49253387a0..ec6b35e9f966 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2106,6 +2106,11 @@ static int __do_request(struct ceph_mds_client *mdsc, dout("do_request mdsmap err %d\n", err); goto finish; } + if (mdsc->mdsmap->m_epoch == 0) { + dout("do_request no mdsmap, waiting for map\n"); + list_add(&req->r_wait, &mdsc->waiting_for_map); + goto finish; + } if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { From 84fcc2d2bd6cbf621e49e1d0f7eaef2e3c666b40 Mon Sep 17 00:00:00 2001 From: "Geng, Jichao" Date: Thu, 5 Jan 2017 16:50:39 +0800 Subject: [PATCH 187/258] ceph: fix get_oldest_context() For no snapshot case, we should use ci->truncate_{seq,size}. Fixes: 5f743e456606 ("ceph: record truncate size/seq for snap data writeback") Signed-off-by: Geng, Jichao Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9cd0c0ea7cdb..e4b066cd912a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -502,9 +502,9 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, dout(" head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); if (truncate_size) - *truncate_size = capsnap->truncate_size; + *truncate_size = ci->i_truncate_size; if (truncate_seq) - *truncate_seq = capsnap->truncate_seq; + *truncate_seq = ci->i_truncate_seq; } spin_unlock(&ci->i_ceph_lock); return snapc; From 30f939feaeee23e21391cfc7b484f012eb189c3c Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Mon, 9 Jan 2017 22:53:36 +0700 Subject: [PATCH 188/258] i2c: fix kernel memory disclosure in dev interface i2c_smbus_xfer() does not always fill an entire block, allowing kernel stack memory disclosure through the temp variable. Clear it before it's read to. Signed-off-by: Vlad Tsyrklevich Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 66f323fd3982..6f638bbc922d 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -331,7 +331,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, unsigned long arg) { struct i2c_smbus_ioctl_data data_arg; - union i2c_smbus_data temp; + union i2c_smbus_data temp = {}; int datasize, res; if (copy_from_user(&data_arg, From 331c34255293cd02d395b7097008b509ba89e60e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 4 Jan 2017 20:57:22 -0800 Subject: [PATCH 189/258] i2c: do not enable fall back to Host Notify by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Falling back unconditionally to HostNotify as primary client's interrupt breaks some drivers which alter their functionality depending on whether interrupt is present or not, so let's introduce a board flag telling I2C core explicitly if we want wired interrupt or HostNotify-based one: I2C_CLIENT_HOST_NOTIFY. For DT-based systems we introduce "host-notify" property that we convert to I2C_CLIENT_HOST_NOTIFY board flag. Tested-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Acked-by: Pali Rohár Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c.txt | 8 ++++++++ drivers/i2c/i2c-core.c | 17 ++++++++--------- include/linux/i2c.h | 1 + 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index 5fa691e6f638..cee9d5055fa2 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -62,6 +62,9 @@ wants to support one of the below features, it should adapt the bindings below. "irq" and "wakeup" names are recognized by I2C core, other names are left to individual drivers. +- host-notify + device uses SMBus host notify protocol instead of interrupt line. + - multi-master states that there is another master active on this bus. The OS can use this information to adapt power management to keep the arbitration awake @@ -81,6 +84,11 @@ Binding may contain optional "interrupts" property, describing interrupts used by the device. I2C core will assign "irq" interrupt (or the very first interrupt if not using interrupt names) as primary interrupt for the slave. +Alternatively, devices supporting SMbus Host Notify, and connected to +adapters that support this feature, may use "host-notify" property. I2C +core will create a virtual interrupt for Host Notify and assign it as +primary interrupt for the slave. + Also, if device is marked as a wakeup source, I2C core will set up "wakeup" interrupt for the device. If "wakeup" interrupt name is not present in the binding, then primary interrupt will be used as wakeup interrupt. diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index cf9e396d7702..7b117240f1ea 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -931,7 +931,10 @@ static int i2c_device_probe(struct device *dev) if (!client->irq) { int irq = -ENOENT; - if (dev->of_node) { + if (client->flags & I2C_CLIENT_HOST_NOTIFY) { + dev_dbg(dev, "Using Host Notify IRQ\n"); + irq = i2c_smbus_host_notify_to_irq(client); + } else if (dev->of_node) { irq = of_irq_get_byname(dev->of_node, "irq"); if (irq == -EINVAL || irq == -ENODATA) irq = of_irq_get(dev->of_node, 0); @@ -940,14 +943,7 @@ static int i2c_device_probe(struct device *dev) } if (irq == -EPROBE_DEFER) return irq; - /* - * ACPI and OF did not find any useful IRQ, try to see - * if Host Notify can be used. - */ - if (irq < 0) { - dev_dbg(dev, "Using Host Notify IRQ\n"); - irq = i2c_smbus_host_notify_to_irq(client); - } + if (irq < 0) irq = 0; @@ -1716,6 +1712,9 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, info.of_node = of_node_get(node); info.archdata = &dev_ad; + if (of_property_read_bool(node, "host-notify")) + info.flags |= I2C_CLIENT_HOST_NOTIFY; + if (of_get_property(node, "wakeup-source", NULL)) info.flags |= I2C_CLIENT_WAKE; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b2109c522dec..4b45ec46161f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -665,6 +665,7 @@ i2c_unlock_adapter(struct i2c_adapter *adapter) #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ +#define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ From 6f724fb3039522486fce2e32e4c0fbe238a6ab02 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 6 Jan 2017 19:02:57 +0800 Subject: [PATCH 190/258] i2c: print correct device invalid address In of_i2c_register_device(), when the check for device address validity fails we print the info.addr, which has not been assigned properly. Fix this by printing the actual invalid address. Signed-off-by: John Garry Reviewed-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang Fixes: b4e2f6ac1281 ("i2c: apply DT flags when probing") Cc: stable@kernel.org --- drivers/i2c/i2c-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 7b117240f1ea..c26296c2eac5 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1704,7 +1704,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, if (i2c_check_addr_validity(addr, info.flags)) { dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n", - info.addr, node->full_name); + addr, node->full_name); return ERR_PTR(-EINVAL); } From 2659161dd40dbb599a19f320164373093df44a89 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 29 Dec 2016 22:27:33 +0000 Subject: [PATCH 191/258] i2c: fix spelling mistake: "insufficent" -> "insufficient" Trivial fix to spelling mistake in WARN message, insufficient has an insufficient number of i's in the spelling. Signed-off-by: Colin Ian King Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index c26296c2eac5..583e95042a21 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -3632,7 +3632,7 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb) int ret; if (!client || !slave_cb) { - WARN(1, "insufficent data\n"); + WARN(1, "insufficient data\n"); return -EINVAL; } From 701dc207bf551d9fe6defa36e84a911e880398c3 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 11 Jan 2017 10:11:44 +0100 Subject: [PATCH 192/258] i2c: piix4: Avoid race conditions with IMC On AMD's SB800 and upwards, the SMBus is shared with the Integrated Micro Controller (IMC). The platform provides a hardware semaphore to avoid race conditions among them. (Check page 288 of the SB800-Series Southbridges Register Reference Guide http://support.amd.com/TechDocs/45482.pdf) Without this patch, many access to the SMBus end with an invalid transaction or even with the bus stalled. Reported-by: Alexandre Desnoyers Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: Andy Shevchenko : Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index c2268cdf38e8..e34d82e79b98 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -585,10 +585,29 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 command, int size, union i2c_smbus_data *data) { struct i2c_piix4_adapdata *adapdata = i2c_get_adapdata(adap); + unsigned short piix4_smba = adapdata->smba; + int retries = MAX_TIMEOUT; + int smbslvcnt; u8 smba_en_lo; u8 port; int retval; + /* Request the SMBUS semaphore, avoid conflicts with the IMC */ + smbslvcnt = inb_p(SMBSLVCNT); + do { + outb_p(smbslvcnt | 0x10, SMBSLVCNT); + + /* Check the semaphore status */ + smbslvcnt = inb_p(SMBSLVCNT); + if (smbslvcnt & 0x10) + break; + + usleep_range(1000, 2000); + } while (--retries); + /* SMBus is still owned by the IMC, we give up */ + if (!retries) + return -EBUSY; + mutex_lock(&piix4_mutex_sb800); outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); @@ -606,6 +625,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, mutex_unlock(&piix4_mutex_sb800); + /* Release the semaphore */ + outb_p(smbslvcnt | 0x20, SMBSLVCNT); + return retval; } From ea7a80858f57d8878b1499ea0f1b8a635cc48de7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Jan 2017 14:29:54 -0800 Subject: [PATCH 193/258] net: lwtunnel: Handle lwtunnel_fill_encap failure Handle failure in lwtunnel_fill_encap adding attributes to skb. Fixes: 571e722676fe ("ipv4: support for fib route lwtunnel encap attributes") Fixes: 19e42e451506 ("ipv6: support for fib route lwtunnel encap attributes") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 11 +++++++---- net/ipv6/route.c | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index eba1546b5031..9a375b908d01 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1279,8 +1279,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate) - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); + if (fi->fib_nh->nh_lwtstate && + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1316,8 +1317,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate) - lwtunnel_fill_encap(skb, nh->nh_lwtstate); + if (nh->nh_lwtstate && + lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ce5aaf448c54..4f6b067c8753 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3317,7 +3317,8 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->dst.lwtstate); + if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; From 994c5483e7f6dbf9fea622ba2031b9d868feb4b9 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 11 Jan 2017 16:45:51 -0600 Subject: [PATCH 194/258] net: qcom/emac: grab a reference to the phydev on ACPI systems Commit 6ffe1c4cd0a7 ("net: qcom/emac: fix of_node and phydev leaks") fixed the problem with reference leaks on phydev, but the fix is device-tree specific. When the driver unloads, the reference is dropped only on DT systems. Instead, it's cleaner if up grab an reference on ACPI systems. When the driver unloads, we can drop the reference without having to check whether we're on a DT system. Signed-off-by: Timur Tabi Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-phy.c | 7 +++++++ drivers/net/ethernet/qualcomm/emac/emac.c | 6 ++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c index 99a14df28b96..2851b4c56570 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -201,6 +201,13 @@ int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) else adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr); + /* of_phy_find_device() claims a reference to the phydev, + * so we do that here manually as well. When the driver + * later unloads, it can unilaterally drop the reference + * without worrying about ACPI vs DT. + */ + if (adpt->phydev) + get_device(&adpt->phydev->mdio.dev); } else { struct device_node *phy_np; diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 422289c232bc..f46d300bd585 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -719,8 +719,7 @@ static int emac_probe(struct platform_device *pdev) err_undo_napi: netif_napi_del(&adpt->rx_q.napi); err_undo_mdiobus: - if (!has_acpi_companion(&pdev->dev)) - put_device(&adpt->phydev->mdio.dev); + put_device(&adpt->phydev->mdio.dev); mdiobus_unregister(adpt->mii_bus); err_undo_clocks: emac_clks_teardown(adpt); @@ -740,8 +739,7 @@ static int emac_remove(struct platform_device *pdev) emac_clks_teardown(adpt); - if (!has_acpi_companion(&pdev->dev)) - put_device(&adpt->phydev->mdio.dev); + put_device(&adpt->phydev->mdio.dev); mdiobus_unregister(adpt->mii_bus); free_netdev(netdev); From 8a430ed50bb1b19ca14a46661f3b1b35f2fb5c39 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Jan 2017 15:42:17 -0800 Subject: [PATCH 195/258] net: ipv4: fix table id in getroute response rtm_table is an 8-bit field while table ids are allowed up to u32. Commit 709772e6e065 ("net: Fix routing tables with id > 255 for legacy software") added the preference to set rtm_table in dumps to RT_TABLE_COMPAT if the table id is > 255. The table id returned on get route requests should do the same. Fixes: c36ba6603a11 ("net: Allow user to get table id from route lookup") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0fcac8e7a2b2..709ffe67d1de 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2472,7 +2472,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; From 2dfc61736482441993bfb7dfaa971113b53f107c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jan 2017 08:47:00 -0500 Subject: [PATCH 196/258] NFSv4: Call update_changeattr() from _nfs4_proc_open only if a file was created We don't want to invalidate the directory attribute and data cache unless we know that a file was created, or the change attribute differs from the one in our cache. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 700ed1fc1075..4010c33151ad 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2390,11 +2390,12 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); if (o_arg->open_flags & O_CREAT) { - update_changeattr(dir, &o_res->cinfo); if (o_arg->open_flags & O_EXCL) data->file_created = 1; else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = 1; + if (data->file_created || dir->i_version != o_res->cinfo.after) + update_changeattr(dir, &o_res->cinfo); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; From c733c49c32624f927f443be6dbabb387006bbe42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jan 2017 12:32:26 -0500 Subject: [PATCH 197/258] NFSv4: Don't apply change_info4 twice on rename within a directory If a file is renamed, but stays in the same directory, we will still receive 2 change_info4 structures describing the change to that directory, but we only want to apply it once. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4010c33151ad..1e797bf74aaf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4159,8 +4159,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(old_dir, &res->old_cinfo); - update_changeattr(new_dir, &res->new_cinfo); + if (task->tk_status == 0) { + update_changeattr(old_dir, &res->old_cinfo); + if (new_dir != old_dir) + update_changeattr(new_dir, &res->new_cinfo); + } return 1; } From c40d52fe1c2ba25dcb8cd207c8d26ef5f57f0476 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jan 2017 12:36:11 -0500 Subject: [PATCH 198/258] NFSv4: Don't call update_changeattr() unless the unlink is successful If the unlink wasn't successful, then the directory has presumably not changed. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1e797bf74aaf..6a35204affa4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4125,7 +4125,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(dir, &res->cinfo); + if (task->tk_status == 0) + update_changeattr(dir, &res->cinfo); return 1; } From d3129ef672cac81c4d0185336af377c8dc1091d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jan 2017 22:07:28 -0500 Subject: [PATCH 199/258] NFSv4: update_changeattr should update the attribute timestamp Otherwise, the attribute cache remains marked as being expired. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a35204affa4..ecc151697fd4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1082,7 +1082,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt, return nfs4_call_sync_sequence(clnt, server, msg, args, res); } -static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, + unsigned long timestamp) { struct nfs_inode *nfsi = NFS_I(dir); @@ -1098,6 +1099,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) NFS_INO_INVALID_ACL; } dir->i_version = cinfo->after; + nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); @@ -2395,7 +2397,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = 1; if (data->file_created || dir->i_version != o_res->cinfo.after) - update_changeattr(dir, &o_res->cinfo); + update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; @@ -4073,11 +4076,12 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; + unsigned long timestamp = jiffies; int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); if (status == 0) - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, timestamp); return status; } @@ -4126,7 +4130,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) &data->timeout) == -EAGAIN) return 0; if (task->tk_status == 0) - update_changeattr(dir, &res->cinfo); + update_changeattr(dir, &res->cinfo, res->dir_attr->time_start); return 1; } @@ -4161,9 +4165,9 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 0; if (task->tk_status == 0) { - update_changeattr(old_dir, &res->old_cinfo); + update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start); if (new_dir != old_dir) - update_changeattr(new_dir, &res->new_cinfo); + update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start); } return 1; } @@ -4201,7 +4205,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, res.fattr->time_start); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) nfs_setsecurity(inode, res.fattr, res.label); @@ -4276,7 +4280,8 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { - update_changeattr(dir, &data->res.dir_cinfo); + update_changeattr(dir, &data->res.dir_cinfo, + data->res.fattr->time_start); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); } return status; From dcd208697707b12adeaa45643bab239c5e90ef9b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 11 Jan 2017 20:34:50 -0500 Subject: [PATCH 200/258] nfsd: fix supported attributes for acl & labels Oops--in 916d2d844afd I moved some constants into an array for convenience, but here I'm accidentally writing to that array. The effect is that if you ever encounter a filesystem lacking support for ACLs or security labels, then all queries of supported attributes will report that attribute as unsupported from then on. Fixes: 916d2d844afd "nfsd: clean up supported attribute handling" Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7ecf16be4a44..8fae53ce21d1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2440,7 +2440,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 *supp = nfsd_suppattrs[minorversion]; + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); if (!IS_POSIXACL(dentry->d_inode)) supp[0] &= ~FATTR4_WORD0_ACL; From 78794d1890708cf94e3961261e52dcec2cc34722 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 9 Jan 2017 17:15:18 -0500 Subject: [PATCH 201/258] svcrpc: don't leak contexts on PROC_DESTROY Context expiry times are in units of seconds since boot, not unix time. The use of get_seconds() here therefore sets the expiry time decades in the future. This prevents timely freeing of contexts destroyed by client RPC_GSS_PROC_DESTROY requests. We'd still free them eventually (when the module is unloaded or the container shut down), but a lot of contexts could pile up before then. Cc: stable@vger.kernel.org Fixes: c5b29f885afe "sunrpc: use seconds since boot in expiry cache" Reported-by: Andy Adamson Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 886e9d381771..153082598522 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; From 546125d1614264d26080817d0c8cddb9b25081fa Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 5 Jan 2017 16:34:51 -0500 Subject: [PATCH 202/258] sunrpc: don't call sleeping functions from the notifier block callbacks The inet6addr_chain is an atomic notifier chain, so we can't call anything that might sleep (like lock_sock)... instead of closing the socket from svc_age_temp_xprts_now (which is called by the notifier function), just have the rpc service threads do it instead. Cc: stable@vger.kernel.org Fixes: c3d4879e01be "sunrpc: Add a function to close..." Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e5d193440374..7440290f64ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,7 @@ struct svc_xprt { #define XPT_LISTENER 10 /* listening endpoint */ #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ +#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cb..9c9db55a0c1e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); + if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; @@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - xprt->xpt_ops->xpo_kill_temp_xprt(xprt); - svc_close_xprt(xprt); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); + dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", + xprt); + svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); From 05a974efa4bdf6e2a150e3f27dc6fcf0a9ad5655 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Thu, 15 Dec 2016 18:40:14 +0100 Subject: [PATCH 203/258] ieee802154: atusb: do not use the stack for buffers to make them DMA able From 4.9 we should really avoid using the stack here as this will not be DMA able on various platforms. This changes the buffers already being present in time of 4.9 being released. This should go into stable as well. Reported-by: Dan Carpenter Cc: stable@vger.kernel.org Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/atusb.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 1253f864737a..fa3e8c34b26c 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -117,13 +117,26 @@ static int atusb_read_reg(struct atusb *atusb, uint8_t reg) { struct usb_device *usb_dev = atusb->usb_dev; int ret; + uint8_t *buffer; uint8_t value; + buffer = kmalloc(1, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + dev_dbg(&usb_dev->dev, "atusb: reg = 0x%x\n", reg); ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_REG_READ, ATUSB_REQ_FROM_DEV, - 0, reg, &value, 1, 1000); - return ret >= 0 ? value : ret; + 0, reg, buffer, 1, 1000); + + if (ret >= 0) { + value = buffer[0]; + kfree(buffer); + return value; + } else { + kfree(buffer); + return ret; + } } static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, @@ -608,9 +621,13 @@ static const struct ieee802154_ops atusb_ops = { static int atusb_get_and_show_revision(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - unsigned char buffer[3]; + unsigned char *buffer; int ret; + buffer = kmalloc(3, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + /* Get a couple of the ATMega Firmware values */ ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0, @@ -631,15 +648,20 @@ static int atusb_get_and_show_revision(struct atusb *atusb) dev_info(&usb_dev->dev, "Please update to version 0.2 or newer"); } + kfree(buffer); return ret; } static int atusb_get_and_show_build(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - char build[ATUSB_BUILD_SIZE + 1]; + char *build; int ret; + build = kmalloc(ATUSB_BUILD_SIZE + 1, GFP_KERNEL); + if (!build) + return -ENOMEM; + ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); @@ -648,6 +670,7 @@ static int atusb_get_and_show_build(struct atusb *atusb) dev_info(&usb_dev->dev, "Firmware: build %s\n", build); } + kfree(build); return ret; } From 2fd2b550a5ed13b1d6640ff77630fc369636a544 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Thu, 15 Dec 2016 18:40:15 +0100 Subject: [PATCH 204/258] ieee802154: atusb: make sure we set a randaom extended address if fetching fails In the unlikely case were the firmware is new enough but the actual USB command still fails make sure we set a random address and return. Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/atusb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index fa3e8c34b26c..67790f88908d 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -737,8 +737,11 @@ static int atusb_set_extended_addr(struct atusb *atusb) ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0, buffer, IEEE802154_EXTENDED_ADDR_LEN, 1000); - if (ret < 0) - dev_err(&usb_dev->dev, "failed to fetch extended address\n"); + if (ret < 0) { + dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n"); + ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr); + return ret; + } memcpy(&extended_addr, buffer, IEEE802154_EXTENDED_ADDR_LEN); /* Check if read address is not empty and the unicast bit is set correctly */ From 5eb35a6ccea61648a55713c076ab65423eea1ac0 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Thu, 15 Dec 2016 18:40:16 +0100 Subject: [PATCH 205/258] ieee802154: atusb: do not use the stack for address fetching to make it DMA able From 4.9 we should really avoid using the stack here as this will not be DMA able on various platforms. This changes a buffer that was introduced in the 4.10 merge window. Fixes: 6cc33eba232c ("ieee802154: atusb: try to read permanent extended address from device") Reported-by: Dan Carpenter Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/atusb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 67790f88908d..63cb67917a07 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -721,7 +721,7 @@ fail: static int atusb_set_extended_addr(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - unsigned char buffer[IEEE802154_EXTENDED_ADDR_LEN]; + unsigned char *buffer; __le64 extended_addr; u64 addr; int ret; @@ -733,6 +733,10 @@ static int atusb_set_extended_addr(struct atusb *atusb) return 0; } + buffer = kmalloc(IEEE802154_EXTENDED_ADDR_LEN, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + /* Firmware is new enough so we fetch the address from EEPROM */ ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0, @@ -740,6 +744,7 @@ static int atusb_set_extended_addr(struct atusb *atusb) if (ret < 0) { dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n"); ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr); + kfree(buffer); return ret; } @@ -755,6 +760,7 @@ static int atusb_set_extended_addr(struct atusb *atusb) &addr); } + kfree(buffer); return ret; } From f301606934b240fb54d8edf3618a0483e36046fc Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Sun, 18 Dec 2016 15:25:33 -0800 Subject: [PATCH 206/258] at86rf230: Allow slow GPIO pins for "rstn" Driver code never touches "rstn" signal in atomic context, so there's no need to implicitly put such restriction on it by using gpio_set_value to manipulate it. Replace gpio_set_value to gpio_set_value_cansleep to fix that. As a an example of where such restriction might be inconvenient, consider a hardware design where "rstn" is connected to a pin of I2C/SPI GPIO expander chip. Cc: Chris Healy Signed-off-by: Andrey Smirnov Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/at86rf230.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 46d53a6c8cf8..76ba7ecfe142 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1715,9 +1715,9 @@ static int at86rf230_probe(struct spi_device *spi) /* Reset */ if (gpio_is_valid(rstn)) { udelay(1); - gpio_set_value(rstn, 0); + gpio_set_value_cansleep(rstn, 0); udelay(1); - gpio_set_value(rstn, 1); + gpio_set_value_cansleep(rstn, 1); usleep_range(120, 240); } From 8e38b7d4d71479b23b77f01cf0e5071610b8f357 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Mon, 2 Jan 2017 16:58:13 +0100 Subject: [PATCH 207/258] ieee802154: atusb: fix driver to work with older firmware versions After the addition of the frame_retries callback we could run into cases where a ATUSB device with an older firmware version would now longer be able to bring the interface up. We keep this functionality disabled now if the minimum firmware version for this feature is not available. Fixes: 5d82288b93db3bc ("ieee802154: atusb: implement .set_frame_retries ops callback") Reported-by: Alexander Aring Acked-by: Alexander Aring Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/atusb.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 63cb67917a07..ef688518ad77 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -562,13 +562,6 @@ static int atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries) { struct atusb *atusb = hw->priv; - struct device *dev = &atusb->usb_dev->dev; - - if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 3) { - dev_info(dev, "Automatic frame retransmission is only available from " - "firmware version 0.3. Please update if you want this feature."); - return -EINVAL; - } return atusb_write_subreg(atusb, SR_MAX_FRAME_RETRIES, retries); } @@ -802,8 +795,7 @@ static int atusb_probe(struct usb_interface *interface, hw->parent = &usb_dev->dev; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | - IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS | - IEEE802154_HW_FRAME_RETRIES; + IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | WPAN_PHY_FLAG_CCA_MODE; @@ -832,6 +824,9 @@ static int atusb_probe(struct usb_interface *interface, atusb_get_and_show_build(atusb); atusb_set_extended_addr(atusb); + if (atusb->fw_ver_maj >= 0 && atusb->fw_ver_min >= 3) + hw->flags |= IEEE802154_HW_FRAME_RETRIES; + ret = atusb_get_and_clear_error(atusb); if (ret) { dev_err(&atusb->usb_dev->dev, From ce1ca7d2d140a1f4aaffd297ac487f246963dd2f Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Mon, 9 Jan 2017 16:00:44 +0530 Subject: [PATCH 208/258] svcrdma: avoid duplicate dma unmapping during error recovery In rdma_read_chunk_frmr() when ib_post_send() fails, the error code path invokes ib_dma_unmap_sg() to unmap the sg list. It then invokes svc_rdma_put_frmr() which in turn tries to unmap the same sg list through ib_dma_unmap_sg() again. This second unmap is invalid and could lead to problems when the iova being unmapped is subsequently reused. Remove the call to unmap in rdma_read_chunk_frmr() and let svc_rdma_put_frmr() handle it. Fixes: 412a15c0fe53 ("svcrdma: Port to new memory registration API") Cc: stable@vger.kernel.org Signed-off-by: Sriharsha Basavapatna Reviewed-by: Chuck Lever Reviewed-by: Yuval Shaia Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 57d35fbb1c28..172b537f8cfc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -347,8 +347,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - ib_dma_unmap_sg(xprt->sc_cm_id->device, - frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; From 7aa4865506a26c607e00bd9794a85785b55ebca7 Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Thu, 12 Jan 2017 07:28:06 -0800 Subject: [PATCH 209/258] net: thunderx: acpi: fix LMAC initialization While probing BGX we requesting appropriate QLM for it's configuration and get LMAC count by that request. Then, while reading configured MAC values from SSDT table we need to save them in proper mapping: BGX[i]->lmac[j].mac = to later provide for initialization stuff. In order to fill such mapping properly we need to add lmac index to be used while acpi initialization since at this moment bgx->lmac_count already contains actual value. Signed-off-by: Vadim Lomovtsev Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 9211c750e064..2f85b64f01fa 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -47,8 +47,9 @@ struct lmac { struct bgx { u8 bgx_id; struct lmac lmac[MAX_LMAC_PER_BGX]; - int lmac_count; + u8 lmac_count; u8 max_lmac; + u8 acpi_lmac_idx; void __iomem *reg_base; struct pci_dev *pdev; bool is_dlm; @@ -1143,13 +1144,13 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle, if (acpi_bus_get_device(handle, &adev)) goto out; - acpi_get_mac_address(dev, adev, bgx->lmac[bgx->lmac_count].mac); + acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac); - SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, dev); + SET_NETDEV_DEV(&bgx->lmac[bgx->acpi_lmac_idx].netdev, dev); - bgx->lmac[bgx->lmac_count].lmacid = bgx->lmac_count; + bgx->lmac[bgx->acpi_lmac_idx].lmacid = bgx->acpi_lmac_idx; + bgx->acpi_lmac_idx++; /* move to next LMAC */ out: - bgx->lmac_count++; return AE_OK; } From d1b333d12cde9cabe898160b6be9769d3382d81c Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 12 Jan 2017 16:52:03 +0800 Subject: [PATCH 210/258] vfio iommu type1: fix the testing of capability for remote task Before the mdev enhancement type1 iommu used capable() to test the capability of current task; in the course of mdev development a new requirement, testing for another task other than current, was raised. ns_capable() was used for this purpose, however it still tests current, the only difference is, in a specified namespace. Fix it by using has_capability() instead, which tests the cap for specified task in init_user_ns, the same namespace as capable(). Cc: Gerd Hoffmann Signed-off-by: Jike Song Reviewed-by: James Morris Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9266271a787a..77373e51b283 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -495,8 +495,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { unsigned long limit; - bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns, - CAP_IPC_LOCK); + bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK); struct mm_struct *mm; int ret; bool rsvd; From 3139dc8ded6f27552a248d23fe9f086e3027fa12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 11 Jan 2017 15:39:31 +0100 Subject: [PATCH 211/258] dmaengine: rcar-dmac: unmap slave resource when channel is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The slave mapping should be removed together with other channel resources when the channel is freed. If it's not unmapped it will hang around forever after the channel is freed. Fixes: 9f878603dbdb7db3 ("dmaengine: rcar-dmac: add iommu support for slave transfers") Reported-by: Laurent Pinchart Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2e441d0ccd79..4c357d475465 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -986,6 +986,7 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) { struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); struct rcar_dmac *dmac = to_rcar_dmac(chan->device); + struct rcar_dmac_chan_map *map = &rchan->map; struct rcar_dmac_desc_page *page, *_page; struct rcar_dmac_desc *desc; LIST_HEAD(list); @@ -1019,6 +1020,13 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) free_page((unsigned long)page); } + /* Remove slave mapping if present. */ + if (map->slave.xfer_size) { + dma_unmap_resource(chan->device->dev, map->addr, + map->slave.xfer_size, map->dir, 0); + map->slave.xfer_size = 0; + } + pm_runtime_put(chan->device->dev); } From 43071d8fb3b7f589d72663c496a6880fb097533c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 13 Jan 2017 11:28:25 +0100 Subject: [PATCH 212/258] mac80211: initialize SMPS field in HT capabilities ibss and mesh modes copy the ht capabilites from the band without overriding the SMPS state. Unfortunately the default value 0 for the SMPS field means static SMPS instead of disabled. This results in HT ibss and mesh setups using only single-stream rates, even though SMPS is not supposed to be active. Initialize SMPS to disabled for all bands on ieee80211_hw_register to ensure that the value is sane where it is not overriden with the real SMPS state. Reported-by: Elektra Wagenrad Signed-off-by: Felix Fietkau [move VHT TODO comment to a better place] Signed-off-by: Johannes Berg --- net/mac80211/main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1822c77f2b1c..56fb47953b72 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -913,12 +913,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; - if (sband->ht_cap.ht_supported) - local->rx_chains = - max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), - local->rx_chains); + if (!sband->ht_cap.ht_supported) + continue; /* TODO: consider VHT for RX chains, hopefully it's the same */ + local->rx_chains = + max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), + local->rx_chains); + + /* no need to mask, SM_PS_DISABLED has all bits set */ + sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED << + IEEE80211_HT_CAP_SM_PS_SHIFT; } /* if low-level driver supports AP, we also support VLAN */ From dbef53621116474bb883f76f0ba6b7640bc42332 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 13 Jan 2017 13:32:51 +0100 Subject: [PATCH 213/258] mac80211: prevent skb/txq mismatch Station structure is considered as not uploaded (to driver) until drv_sta_state() finishes. This call is however done after the structure is attached to mac80211 internal lists and hashes. This means mac80211 can lookup (and use) station structure before it is uploaded to a driver. If this happens (structure exists, but sta->uploaded is false) fast_tx path can still be taken. Deep in the fastpath call the sta->uploaded is checked against to derive "pubsta" argument for ieee80211_get_txq(). If sta->uploaded is false (and sta is actually non-NULL) ieee80211_get_txq() effectively downgraded to vif->txq. At first glance this may look innocent but coerces mac80211 into a state that is almost guaranteed (codel may drop offending skb) to crash because a station-oriented skb gets queued up on vif-oriented txq. The ieee80211_tx_dequeue() ends up looking at info->control.flags and tries to use txq->sta which in the fail case is NULL. It's probably pointless to pretend one can downgrade skb from sta-txq to vif-txq. Since downgrading unicast traffic to vif->txq must not be done there's no txq to put a frame on if sta->uploaded is false. Therefore the code is made to fall back to regular tx() op path if the described condition is hit. Only drivers using wake_tx_queue were affected. Example crash dump before fix: Unable to handle kernel paging request at virtual address ffffe26c PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211] [] (ieee80211_tx_dequeue [mac80211]) from [] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core]) [] (ath10k_mac_tx_push_txq [ath10k_core]) from [] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core]) [] (ath10k_htt_txrx_compl_task [ath10k_core]) [] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci]) [] (ath10k_pci_napi_poll [ath10k_pci]) from [] (net_rx_action+0xac/0x160) Reported-by: Mohammed Shafi Shajakhan Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d8b716e509e..797e847cbc49 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1243,7 +1243,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -1257,10 +1257,13 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, if (!ieee80211_is_data(hdr->frame_control)) return NULL; - if (pubsta) { + if (sta) { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - txq = pubsta->txq[tid]; + if (!sta->uploaded) + return NULL; + + txq = sta->sta.txq[tid]; } else if (vif) { txq = vif->txq; } @@ -1503,23 +1506,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; - struct ieee80211_sta *pubsta; if (!local->ops->wake_tx_queue || sdata->vif.type == NL80211_IFTYPE_MONITOR) return false; - if (sta && sta->uploaded) - pubsta = &sta->sta; - else - pubsta = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); vif = &sdata->vif; - txqi = ieee80211_get_txq(local, vif, pubsta, skb); + txqi = ieee80211_get_txq(local, vif, sta, skb); if (!txqi) return false; From 94a6fa899d2cb5ee76933406df32996576a562e4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 12 Jan 2017 08:24:16 -0700 Subject: [PATCH 214/258] vfio/type1: Remove pid_namespace.h include Using has_capability() rather than ns_capable(), we're no longer using this header. Cc: Jike Song Cc: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 77373e51b283..b3cc33fa6d26 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include From 148d3d021cf9724fcf189ce4e525a094bbf5ce89 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 12 Jan 2017 12:09:09 -0800 Subject: [PATCH 215/258] net: systemport: Decouple flow control from __bcm_sysport_tx_reclaim The __bcm_sysport_tx_reclaim() function is used to reclaim transmit resources in different places within the driver. Most of them should not affect the state of the transit flow control. Introduce bcm_sysport_tx_clean() which cleans the ring, but does not re-enable flow control towards the networking stack, and make bcm_sysport_tx_reclaim() do the actual transmit queue flow control. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 7e8cf213fd81..744ed6ddaf37 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -710,11 +710,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; unsigned int pkts_compl = 0, bytes_compl = 0; struct bcm_sysport_cb *cb; - struct netdev_queue *txq; u32 hw_ind; - txq = netdev_get_tx_queue(ndev, ring->index); - /* Compute how many descriptors have been processed since last call */ hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; @@ -745,9 +742,6 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, ring->c_index = c_index; - if (netif_tx_queue_stopped(txq) && pkts_compl) - netif_tx_wake_queue(txq); - netif_dbg(priv, tx_done, ndev, "ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n", ring->index, ring->c_index, pkts_compl, bytes_compl); @@ -759,16 +753,33 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, struct bcm_sysport_tx_ring *ring) { + struct netdev_queue *txq; unsigned int released; unsigned long flags; + txq = netdev_get_tx_queue(priv->netdev, ring->index); + spin_lock_irqsave(&ring->lock, flags); released = __bcm_sysport_tx_reclaim(priv, ring); + if (released) + netif_tx_wake_queue(txq); + spin_unlock_irqrestore(&ring->lock, flags); return released; } +/* Locked version of the per-ring TX reclaim, but does not wake the queue */ +static void bcm_sysport_tx_clean(struct bcm_sysport_priv *priv, + struct bcm_sysport_tx_ring *ring) +{ + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + __bcm_sysport_tx_reclaim(priv, ring); + spin_unlock_irqrestore(&ring->lock, flags); +} + static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget) { struct bcm_sysport_tx_ring *ring = @@ -1252,7 +1263,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv, napi_disable(&ring->napi); netif_napi_del(&ring->napi); - bcm_sysport_tx_reclaim(priv, ring); + bcm_sysport_tx_clean(priv, ring); kfree(ring->cbs); ring->cbs = NULL; From fa79581ea66ca43d56ef065346ac5be767fcb418 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Thu, 12 Jan 2017 21:30:01 +0100 Subject: [PATCH 216/258] ipv6: sr: fix several BUGs when preemption is enabled When CONFIG_PREEMPT=y, CONFIG_IPV6=m and CONFIG_SEG6_HMAC=y, seg6_hmac_init() is called during the initialization of the ipv6 module. This causes a subsequent call to smp_processor_id() with preemption enabled, resulting in the following trace. [ 20.451460] BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1 [ 20.452556] caller is debug_smp_processor_id+0x17/0x19 [ 20.453304] CPU: 0 PID: 1 Comm: systemd Not tainted 4.9.0-rc5-00973-g46738b1 #1 [ 20.454406] ffffc9000062fc18 ffffffff813607b2 0000000000000000 ffffffff81a7f782 [ 20.455528] ffffc9000062fc48 ffffffff813778dc 0000000000000000 00000000001dcf98 [ 20.456539] ffffffffa003bd08 ffffffff81af93e0 ffffc9000062fc58 ffffffff81377905 [ 20.456539] Call Trace: [ 20.456539] [] dump_stack+0x63/0x7f [ 20.456539] [] check_preemption_disabled+0xd1/0xe3 [ 20.456539] [] debug_smp_processor_id+0x17/0x19 [ 20.460260] [] seg6_hmac_init+0xfa/0x192 [ipv6] [ 20.460260] [] seg6_init+0x39/0x6f [ipv6] [ 20.460260] [] inet6_init+0x21a/0x321 [ipv6] [ 20.460260] [] ? 0xffffffffa0061000 [ 20.460260] [] do_one_initcall+0x8b/0x115 [ 20.460260] [] do_init_module+0x53/0x1c4 [ 20.460260] [] load_module+0x1153/0x14ec [ 20.460260] [] SYSC_finit_module+0x8c/0xb9 [ 20.460260] [] ? SYSC_finit_module+0x8c/0xb9 [ 20.460260] [] SyS_finit_module+0x9/0xb [ 20.460260] [] do_syscall_64+0x62/0x75 [ 20.460260] [] entry_SYSCALL64_slow_path+0x25/0x25 Moreover, dst_cache_* functions also call smp_processor_id(), generating a similar trace. This patch uses raw_cpu_ptr() in seg6_hmac_init() rather than this_cpu_ptr() and disable preemption when using dst_cache_* functions. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_hmac.c | 2 +- net/ipv6/seg6_iptunnel.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index ef1c8a46e7ac..03a064803626 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -400,7 +400,7 @@ static int seg6_hmac_init_algo(void) *p_tfm = tfm; } - p_tfm = this_cpu_ptr(algo->tfms); + p_tfm = raw_cpu_ptr(algo->tfms); tfm = *p_tfm; shsize = sizeof(*shash) + crypto_shash_descsize(tfm); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bbfca22c34ae..1d60cb132835 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -265,7 +265,9 @@ int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); #ifdef CONFIG_DST_CACHE + preempt_disable(); dst = dst_cache_get(&slwt->cache); + preempt_enable(); #endif if (unlikely(!dst)) { @@ -286,7 +288,9 @@ int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #ifdef CONFIG_DST_CACHE + preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + preempt_enable(); #endif } From 003c941057eaa868ca6fedd29a274c863167230d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 14:24:58 -0800 Subject: [PATCH 217/258] tcp: fix tcp_fastopen unaligned access complaints on sparc Fix up a data alignment issue on sparc by swapping the order of the cookie byte array field with the length field in struct tcp_fastopen_cookie, and making it a proper union to clean up the typecasting. This addresses log complaints like these: log_unaligned: 113 callbacks suppressed Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360 Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360 Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360 Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Cc: Eric Dumazet Signed-off-by: Shannon Nelson Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- net/ipv4/tcp_fastopen.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fc5848dad7a4..c93f4b3a59cb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { + union { + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr; +#endif + }; s8 len; - u8 val[TCP_FASTOPEN_COOKIE_MAX]; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f9..f51919535ca7 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) From c6180a6237174f481dc856ed6e890d8196b6f0fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Jan 2017 13:31:32 -0500 Subject: [PATCH 218/258] NFSv4: Fix client recovery when server reboots multiple times If the server reboots multiple times, the client should rely on the server to tell it that it cannot reclaim state as per section 9.6.3.4 in RFC7530 and section 8.4.2.1 in RFC5661. Currently, the client is being to conservative, and is assuming that if the server reboots while state recovery is in progress, then it must ignore state that was not recovered before the reboot. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1d152f4470cd..90e6193ce6be 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1729,7 +1729,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) break; case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: From 2e3258ecfaebace1ceffaa14e0ea94775d54f46f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jan 2017 12:29:10 +0100 Subject: [PATCH 219/258] block: add blk_rq_payload_bytes Add a helper to calculate the actual data transfer size for special payload requests. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ff3d774f2751..1ca8e8fd1078 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1000,6 +1000,19 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +/* + * Some commands like WRITE SAME have a payload or data transfer size which + * is different from the size of the request. Any driver that supports such + * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to + * calculate the data transfer size. + */ +static inline unsigned int blk_rq_payload_bytes(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec.bv_len; + return blk_rq_bytes(rq); +} + static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { From fd102b125e174edbea34e6e7a2d371bc7901c53d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jan 2017 12:29:11 +0100 Subject: [PATCH 220/258] scsi: use blk_rq_payload_bytes Without that we'll pass a wrong payload size in cmd->sdb, which can lead to hangs with drivers that need the total transfer size. Signed-off-by: Christoph Hellwig Reported-by: Chris Valean Reported-by: Dexuan Cui Fixes: f9d03f96 ("block: improve handling of the magic discard payload") Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c35b6de4ca64..ad4ff8fcd4dd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1018,7 +1018,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) count = blk_rq_map_sg(req->q, req, sdb->table.sgl); BUG_ON(count > sdb->table.nents); sdb->table.nents = count; - sdb->length = blk_rq_bytes(req); + sdb->length = blk_rq_payload_bytes(req); return BLKPREP_OK; } From b131c61d62266eb21b0f125f63f3d07e5670d726 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jan 2017 12:29:12 +0100 Subject: [PATCH 221/258] nvme: use blk_rq_payload_bytes The new blk_rq_payload_bytes generalizes the payload length hacks that nvme_map_len did before. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 5 ++--- drivers/nvme/host/nvme.h | 8 -------- drivers/nvme/host/pci.c | 19 ++++++++----------- drivers/nvme/host/rdma.c | 13 +++++-------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index aa0bc60810a7..fcc9dcfdf675 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1654,13 +1654,12 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, struct nvme_fc_fcp_op *op) { struct nvmefc_fcp_req *freq = &op->fcp_req; - u32 map_len = nvme_map_len(rq); enum dma_data_direction dir; int ret; freq->sg_cnt = 0; - if (!map_len) + if (!blk_rq_payload_bytes(rq)) return 0; freq->sg_table.sgl = freq->first_sgl; @@ -1854,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - data_len = nvme_map_len(rq); + data_len = blk_rq_payload_bytes(rq); if (data_len) io_dir = ((rq_data_dir(rq) == WRITE) ? NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6377e14586dc..aead6d08ed2c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -225,14 +225,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -static inline unsigned nvme_map_len(struct request *rq) -{ - if (req_op(rq) == REQ_OP_DISCARD) - return sizeof(struct nvme_dsm_range); - else - return blk_rq_bytes(rq); -} - static inline void nvme_cleanup_cmd(struct request *req) { if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 19beeb7b2ac2..3faefabf339c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -306,11 +306,11 @@ static __le64 **iod_list(struct request *req) return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); } -static int nvme_init_iod(struct request *rq, unsigned size, - struct nvme_dev *dev) +static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); int nseg = blk_rq_nr_phys_segments(rq); + unsigned int size = blk_rq_payload_bytes(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -420,12 +420,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) } #endif -static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, - int total_len) +static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; - int length = total_len; + int length = blk_rq_payload_bytes(req); struct scatterlist *sg = iod->sg; int dma_len = sg_dma_len(sg); u64 dma_addr = sg_dma_address(sg); @@ -501,7 +500,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, } static int nvme_map_data(struct nvme_dev *dev, struct request *req, - unsigned size, struct nvme_command *cmnd) + struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct request_queue *q = req->q; @@ -519,7 +518,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_ATTR_NO_WARN)) goto out; - if (!nvme_setup_prps(dev, req, size)) + if (!nvme_setup_prps(dev, req)) goto out_unmap; ret = BLK_MQ_RQ_QUEUE_ERROR; @@ -580,7 +579,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_command cmnd; - unsigned map_len; int ret = BLK_MQ_RQ_QUEUE_OK; /* @@ -600,13 +598,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - map_len = nvme_map_len(req); - ret = nvme_init_iod(req, map_len, dev); + ret = nvme_init_iod(req, dev); if (ret != BLK_MQ_RQ_QUEUE_OK) goto out_free_cmd; if (blk_rq_nr_phys_segments(req)) - ret = nvme_map_data(dev, req, map_len, &cmnd); + ret = nvme_map_data(dev, req, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) goto out_cleanup_iod; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 34e564857716..557f29b1f1bb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -981,8 +981,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, } static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, - struct request *rq, unsigned int map_len, - struct nvme_command *c) + struct request *rq, struct nvme_command *c) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; @@ -1014,9 +1013,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, } if (count == 1) { - if (rq_data_dir(rq) == WRITE && - map_len <= nvme_rdma_inline_data_size(queue) && - nvme_rdma_queue_idx(queue)) + if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && + blk_rq_payload_bytes(rq) <= + nvme_rdma_inline_data_size(queue)) return nvme_rdma_map_sg_inline(queue, req, c); if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -1444,7 +1443,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_command *c = sqe->data; bool flush = false; struct ib_device *dev; - unsigned int map_len; int ret; WARN_ON_ONCE(rq->tag < 0); @@ -1462,8 +1460,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - map_len = nvme_map_len(rq); - ret = nvme_rdma_map_data(queue, rq, map_len, c); + ret = nvme_rdma_map_data(queue, rq, c); if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", ret); From f80de881d8df967488b7343381619efa15019493 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jan 2017 12:29:13 +0100 Subject: [PATCH 222/258] sd: remove __data_len hack for WRITE SAME Now that we have the blk_rq_payload_bytes helper available to determine the actual I/O size we don't need to mess around with __data_len for WRITE SAME. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b1933041da39..1fbb1ecf49f2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -836,7 +836,6 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) struct bio *bio = rq->bio; sector_t sector = blk_rq_pos(rq); unsigned int nr_sectors = blk_rq_sectors(rq); - unsigned int nr_bytes = blk_rq_bytes(rq); int ret; if (sdkp->device->no_write_same) @@ -869,21 +868,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) cmd->transfersize = sdp->sector_size; cmd->allowed = SD_MAX_RETRIES; - - /* - * For WRITE_SAME the data transferred in the DATA IN buffer is - * different from the amount of data actually written to the target. - * - * We set up __data_len to the amount of data transferred from the - * DATA IN buffer so that blk_rq_map_sg set up the proper S/G list - * to transfer a single sector of data first, but then reset it to - * the amount of data to be written right after so that the I/O path - * knows how much to actually write. - */ - rq->__data_len = sdp->sector_size; - ret = scsi_init_io(cmd); - rq->__data_len = nr_bytes; - return ret; + return scsi_init_io(cmd); } static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) From bef13315e990fd3d3fb4c39013aefd53f06c3657 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jan 2017 15:18:16 -0700 Subject: [PATCH 223/258] block: don't try to discard from __blkdev_issue_zeroout Discard can return -EIO asynchronously if the alignment for the request isn't suitable for the driver, which makes a proper fallback to other methods in __blkdev_issue_zeroout impossible. Thus only issue a sync discard from blkdev_issue_zeroout an don't try discard at all from __blkdev_issue_zeroout as a non-invasive workaround. One more reason why abusing discard for zeroing must die.. Signed-off-by: Christoph Hellwig Reported-by: Eryu Guan Fixes: e73c23ff ("block: add async variant of blkdev_issue_zeroout") Signed-off-by: Jens Axboe --- block/blk-lib.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index ed89c8f4b2a0..f8c82a9b4012 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -301,13 +301,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, if ((sector | nr_sects) & bs_mask) return -EINVAL; - if (discard) { - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, - BLKDEV_DISCARD_ZERO, biop); - if (ret == 0 || (ret && ret != -EOPNOTSUPP)) - goto out; - } - ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, biop); if (ret == 0 || (ret && ret != -EOPNOTSUPP)) @@ -370,6 +363,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, struct bio *bio = NULL; struct blk_plug plug; + if (discard) { + if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, + BLKDEV_DISCARD_ZERO)) + return 0; + } + blk_start_plug(&plug); ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, &bio, discard); From 695085b4bc7603551db0b3da897b8bf9893ca218 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 13 Jan 2017 01:11:18 -0500 Subject: [PATCH 224/258] x86/tsc: Add the Intel Denverton Processor to native_calibrate_tsc() The Intel Denverton microserver uses a 25 MHz TSC crystal, so we can derive its exact [*] TSC frequency using CPUID and some arithmetic, eg.: TSC: 1800 MHz (25000000 Hz * 216 / 3 / 1000000) [*] 'exact' is only as good as the crystal, which should be +/- 20ppm Signed-off-by: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/306899f94804aece6d8fa8b4223ede3b48dbb59c.1484287748.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index be3a49ee0356..e41af597aed8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void) crystal_khz = 24000; /* 24.0 MHz */ break; case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; case INTEL_FAM6_ATOM_GOLDMONT: From 453828625731d0ba7218242ef6ec88f59408f368 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 12 Jan 2017 16:53:11 +0100 Subject: [PATCH 225/258] x86/mpx: Use compatible types in comparison to fix sparse error info->si_addr is of type void __user *, so it should be compared against something from the same address space. This fixes the following sparse error: arch/x86/mm/mpx.c:296:27: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Tobias Klauser Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 324e5713d386..af59f808742f 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void *)-1) { + if (info->si_addr == (void __user *)-1) { err = -EINVAL; goto err_out; } From 63cae12bce9861cec309798d34701cf3da20bc71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Dec 2016 14:59:00 +0100 Subject: [PATCH 226/258] perf/core: Fix sys_perf_event_open() vs. hotplug There is problem with installing an event in a task that is 'stuck' on an offline CPU. Blocked tasks are not dis-assosciated from offlined CPUs, after all, a blocked task doesn't run and doesn't require a CPU etc.. Only on wakeup do we ammend the situation and place the task on a available CPU. If we hit such a task with perf_install_in_context() we'll loop until either that task wakes up or the CPU comes back online, if the task waking depends on the event being installed, we're stuck. While looking into this issue, I also spotted another problem, if we hit a task with perf_install_in_context() that is in the middle of being migrated, that is we observe the old CPU before sending the IPI, but run the IPI (on the old CPU) while the task is already running on the new CPU, things also go sideways. Rework things to rely on task_curr() -- outside of rq->lock -- which is rather tricky. Imagine the following scenario where we're trying to install the first event into our task 't': CPU0 CPU1 CPU2 (current == t) t->perf_event_ctxp[] = ctx; smp_mb(); cpu = task_cpu(t); switch(t, n); migrate(t, 2); switch(p, t); ctx = t->perf_event_ctxp[]; // must not be NULL smp_function_call(cpu, ..); generic_exec_single() func(); spin_lock(ctx->lock); if (task_curr(t)) // false add_event_to_ctx(); spin_unlock(ctx->lock); perf_event_context_sched_in(); spin_lock(ctx->lock); // sees event So its CPU0's store of t->perf_event_ctxp[] that must not go 'missing'. Because if CPU2's load of that variable were to observe NULL, it would not try to schedule the ctx and we'd have a task running without its counter, which would be 'bad'. As long as we observe !NULL, we'll acquire ctx->lock. If we acquire it first and not see the event yet, then CPU0 must observe task_curr() and retry. If the install happens first, then we must see the event on sched-in and all is well. I think we can translate the first part (until the 'must not be NULL') of the scenario to a litmus test like: C C-peterz { } P0(int *x, int *y) { int r1; WRITE_ONCE(*x, 1); smp_mb(); r1 = READ_ONCE(*y); } P1(int *y, int *z) { WRITE_ONCE(*y, 1); smp_store_release(z, 1); } P2(int *x, int *z) { int r1; int r2; r1 = smp_load_acquire(z); smp_mb(); r2 = READ_ONCE(*x); } exists (0:r1=0 /\ 2:r1=1 /\ 2:r2=0) Where: x is perf_event_ctxp[], y is our tasks's CPU, and z is our task being placed on the rq of CPU2. The P0 smp_mb() is the one added by this patch, ordering the store to perf_event_ctxp[] from find_get_context() and the load of task_cpu() in task_function_call(). The smp_store_release/smp_load_acquire model the RCpc locking of the rq->lock and the smp_mb() of P2 is the context switch switching from whatever CPU2 was running to our task 't'. This litmus test evaluates into: Test C-peterz Allowed States 7 0:r1=0; 2:r1=0; 2:r2=0; 0:r1=0; 2:r1=0; 2:r2=1; 0:r1=0; 2:r1=1; 2:r2=1; 0:r1=1; 2:r1=0; 2:r2=0; 0:r1=1; 2:r1=0; 2:r2=1; 0:r1=1; 2:r1=1; 2:r2=0; 0:r1=1; 2:r1=1; 2:r2=1; No Witnesses Positive: 0 Negative: 7 Condition exists (0:r1=0 /\ 2:r1=1 /\ 2:r2=0) Observation C-peterz Never 0 7 Hash=e427f41d9146b2a5445101d3e2fcaa34 And the strong and weak model agree. Reported-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Will Deacon Cc: jeremy.linton@arm.com Link: http://lkml.kernel.org/r/20161209135900.GU3174@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 70 ++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ab15509fab8c..72ce7d63e561 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info) struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); struct perf_event_context *task_ctx = cpuctx->task_ctx; - bool activate = true; + bool reprogram = true; int ret = 0; raw_spin_lock(&cpuctx->ctx.lock); @@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info) raw_spin_lock(&ctx->lock); task_ctx = ctx; - /* If we're on the wrong CPU, try again */ - if (task_cpu(ctx->task) != smp_processor_id()) { + reprogram = (ctx->task == current); + + /* + * If the task is running, it must be running on this CPU, + * otherwise we cannot reprogram things. + * + * If its not running, we don't care, ctx->lock will + * serialize against it becoming runnable. + */ + if (task_curr(ctx->task) && !reprogram) { ret = -ESRCH; goto unlock; } - /* - * If we're on the right CPU, see if the task we target is - * current, if not we don't have to activate the ctx, a future - * context switch will do that for us. - */ - if (ctx->task != current) - activate = false; - else - WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); - + WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx); } else if (task_ctx) { raw_spin_lock(&task_ctx->lock); } - if (activate) { + if (reprogram) { ctx_sched_out(ctx, cpuctx, EVENT_TIME); add_event_to_ctx(event, ctx); ctx_resched(cpuctx, task_ctx); @@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx, /* * Installing events is tricky because we cannot rely on ctx->is_active * to be set in case this is the nr_events 0 -> 1 transition. + * + * Instead we use task_curr(), which tells us if the task is running. + * However, since we use task_curr() outside of rq::lock, we can race + * against the actual state. This means the result can be wrong. + * + * If we get a false positive, we retry, this is harmless. + * + * If we get a false negative, things are complicated. If we are after + * perf_event_context_sched_in() ctx::lock will serialize us, and the + * value must be correct. If we're before, it doesn't matter since + * perf_event_context_sched_in() will program the counter. + * + * However, this hinges on the remote context switch having observed + * our task->perf_event_ctxp[] store, such that it will in fact take + * ctx::lock in perf_event_context_sched_in(). + * + * We do this by task_function_call(), if the IPI fails to hit the task + * we know any future context switch of task must see the + * perf_event_ctpx[] store. */ -again: + /* - * Cannot use task_function_call() because we need to run on the task's - * CPU regardless of whether its current or not. + * This smp_mb() orders the task->perf_event_ctxp[] store with the + * task_cpu() load, such that if the IPI then does not find the task + * running, a future context switch of that task must observe the + * store. */ - if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) + smp_mb(); +again: + if (!task_function_call(task, __perf_install_in_context, event)) return; raw_spin_lock_irq(&ctx->lock); @@ -2348,12 +2370,16 @@ again: raw_spin_unlock_irq(&ctx->lock); return; } - raw_spin_unlock_irq(&ctx->lock); /* - * Since !ctx->is_active doesn't mean anything, we must IPI - * unconditionally. + * If the task is not running, ctx->lock will avoid it becoming so, + * thus we can safely install the event. */ - goto again; + if (task_curr(task)) { + raw_spin_unlock_irq(&ctx->lock); + goto again; + } + add_event_to_ctx(event, ctx); + raw_spin_unlock_irq(&ctx->lock); } /* From 321027c1fe77f892f4ea07846aeae08cefbbb290 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Jan 2017 21:09:50 +0100 Subject: [PATCH 227/258] perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race Di Shen reported a race between two concurrent sys_perf_event_open() calls where both try and move the same pre-existing software group into a hardware context. The problem is exactly that described in commit: f63a8daa5812 ("perf: Fix event->ctx locking") ... where, while we wait for a ctx->mutex acquisition, the event->ctx relation can have changed under us. That very same commit failed to recognise sys_perf_event_context() as an external access vector to the events and thereby didn't apply the established locking rules correctly. So while one sys_perf_event_open() call is stuck waiting on mutex_lock_double(), the other (which owns said locks) moves the group about. So by the time the former sys_perf_event_open() acquires the locks, the context we've acquired is stale (and possibly dead). Apply the established locking rules as per perf_event_ctx_lock_nested() to the mutex_lock_double() for the 'move_group' case. This obviously means we need to validate state after we acquire the locks. Reported-by: Di Shen (Keen Lab) Tested-by: John Dias Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Min Chong Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Link: http://lkml.kernel.org/r/20170106131444.GZ3174@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 58 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 72ce7d63e561..cbc5937265da 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9529,6 +9529,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) return 0; } +/* + * Variation on perf_event_ctx_lock_nested(), except we take two context + * mutexes. + */ +static struct perf_event_context * +__perf_event_ctx_lock_double(struct perf_event *group_leader, + struct perf_event_context *ctx) +{ + struct perf_event_context *gctx; + +again: + rcu_read_lock(); + gctx = READ_ONCE(group_leader->ctx); + if (!atomic_inc_not_zero(&gctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock_double(&gctx->mutex, &ctx->mutex); + + if (group_leader->ctx != gctx) { + mutex_unlock(&ctx->mutex); + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + goto again; + } + + return gctx; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -9772,12 +9803,31 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - gctx = group_leader->ctx; - mutex_lock_double(&gctx->mutex, &ctx->mutex); + gctx = __perf_event_ctx_lock_double(group_leader, ctx); + if (gctx->task == TASK_TOMBSTONE) { err = -ESRCH; goto err_locked; } + + /* + * Check if we raced against another sys_perf_event_open() call + * moving the software group underneath us. + */ + if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { + /* + * If someone moved the group out from under us, check + * if this new event wound up on the same ctx, if so + * its the regular !move_group case, otherwise fail. + */ + if (gctx != ctx) { + err = -EINVAL; + goto err_locked; + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; + } + } } else { mutex_lock(&ctx->mutex); } @@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); if (task) { @@ -9905,7 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open, err_locked: if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); /* err_file: */ fput(event_file); From 475113d937adfd150eb82b5e2c5507125a68e7af Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 28 Dec 2016 14:31:03 +0100 Subject: [PATCH 228/258] perf/x86/intel: Account interrupts for PEBS errors It's possible to set up PEBS events to get only errors and not any data, like on SNB-X (model 45) and IVB-EP (model 62) via 2 perf commands running simultaneously: taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10 This leads to a soft lock up, because the error path of the intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt for error PEBS interrupts, so in case you're getting ONLY errors you don't have a way to stop the event when it's over the max_samples_per_tick limit: NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816] ... RIP: 0010:[] [] smp_call_function_single+0xe2/0x140 ... Call Trace: ? trace_hardirqs_on_caller+0xf5/0x1b0 ? perf_cgroup_attach+0x70/0x70 perf_install_in_context+0x199/0x1b0 ? ctx_resched+0x90/0x90 SYSC_perf_event_open+0x641/0xf90 SyS_perf_event_open+0x9/0x10 do_syscall_64+0x6c/0x1f0 entry_SYSCALL64_slow_path+0x25/0x25 Add perf_event_account_interrupt() which does the interrupt and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s error path. We keep the pending_kill and pending_wakeup logic only in the __perf_event_overflow() path, because they make sense only if there's any data to deliver. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 6 ++++- include/linux/perf_event.h | 1 + kernel/events/core.c | 47 +++++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd3..9dfeeeca0ea8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; /* log dropped samples number */ - if (error[bit]) + if (error[bit]) { perf_log_lost_samples(event, error[bit]); + if (perf_event_account_interrupt(event)) + x86_pmu_stop(event, 0); + } + if (counts[bit]) { __intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecdb9817..78ed8105e64d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); +extern int perf_event_account_interrupt(struct perf_event *event); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, diff --git a/kernel/events/core.c b/kernel/events/core.c index cbc5937265da..110b38a58493 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -/* - * Generic event overflow handling, sampling. - */ - -static int __perf_event_overflow(struct perf_event *event, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) +static int +__perf_event_account_interrupt(struct perf_event *event, int throttle) { - int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; - u64 seq; int ret = 0; - - /* - * Non-sampling counters might still use the PMI to fold short - * hardware counters, ignore those. - */ - if (unlikely(!is_sampling_event(event))) - return 0; + u64 seq; seq = __this_cpu_read(perf_throttled_seq); if (seq != hwc->interrupts_seq) { @@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, perf_adjust_period(event, delta, hwc->last_period, true); } + return ret; +} + +int perf_event_account_interrupt(struct perf_event *event) +{ + return __perf_event_account_interrupt(event, 1); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + int ret = 0; + + /* + * Non-sampling counters might still use the PMI to fold short + * hardware counters, ignore those. + */ + if (unlikely(!is_sampling_event(event))) + return 0; + + ret = __perf_event_account_interrupt(event, throttle); + /* * XXX event_limit might not quite work as expected on inherited * events From 18e7a45af91acdde99d3aa1372cc40e1f8142f7b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Jan 2017 15:24:54 +0100 Subject: [PATCH 229/258] perf/x86: Reject non sampling events with precise_ip As Peter suggested [1] rejecting non sampling PEBS events, because they dont make any sense and could cause bugs in the NMI handler [2]. [1] http://lkml.kernel.org/r/20170103094059.GC3093@worktop [2] http://lkml.kernel.org/r/1482931866-6018-3-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170103142454.GA26251@krava Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 019c5887b698..1635c0c8df23 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; + + /* There's no sense in having PEBS for non sampling events: */ + if (!is_sampling_event(event)) + return -EINVAL; } /* * check that PEBS LBR correction does not conflict with From c7334ce814f7e5d8fc1f9b3126cda0640c2f81b3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 14 Jan 2017 14:09:03 +0100 Subject: [PATCH 230/258] Revert "driver core: Add deferred_probe attribute to devices in sysfs" This reverts commit 6751667a29d6fd64afb9ce30567ad616b68ed789. Rob Herring objected to it, and a replacement for it will be added using debugfs in the future. Cc: Ben Hutchings Reported-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-deferred_probe | 12 ------------ drivers/base/base.h | 2 -- drivers/base/core.c | 7 ------- drivers/base/dd.c | 13 ------------- 4 files changed, 34 deletions(-) delete mode 100644 Documentation/ABI/testing/sysfs-devices-deferred_probe diff --git a/Documentation/ABI/testing/sysfs-devices-deferred_probe b/Documentation/ABI/testing/sysfs-devices-deferred_probe deleted file mode 100644 index 58553d7a321f..000000000000 --- a/Documentation/ABI/testing/sysfs-devices-deferred_probe +++ /dev/null @@ -1,12 +0,0 @@ -What: /sys/devices/.../deferred_probe -Date: August 2016 -Contact: Ben Hutchings -Description: - The /sys/devices/.../deferred_probe attribute is - present for all devices. If a driver detects during - probing a device that a related device is not yet - ready, it may defer probing of the first device. The - kernel will retry probing the first device after any - other device is successfully probed. This attribute - reads as 1 if probing of this device is currently - deferred, or 0 otherwise. diff --git a/drivers/base/base.h b/drivers/base/base.h index ada9dce34e6d..e19b1008e5fb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -141,8 +141,6 @@ extern void device_unblock_probing(void); extern struct kset *devices_kset; extern void devices_kset_move_last(struct device *dev); -extern struct device_attribute dev_attr_deferred_probe; - #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS) extern void module_add_driver(struct module *mod, struct device_driver *drv); extern void module_remove_driver(struct device_driver *drv); diff --git a/drivers/base/core.c b/drivers/base/core.c index 020ea7f05520..8c25e68e67d7 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1060,14 +1060,8 @@ static int device_add_attrs(struct device *dev) goto err_remove_dev_groups; } - error = device_create_file(dev, &dev_attr_deferred_probe); - if (error) - goto err_remove_online; - return 0; - err_remove_online: - device_remove_file(dev, &dev_attr_online); err_remove_dev_groups: device_remove_groups(dev, dev->groups); err_remove_type_groups: @@ -1085,7 +1079,6 @@ static void device_remove_attrs(struct device *dev) struct class *class = dev->class; const struct device_type *type = dev->type; - device_remove_file(dev, &dev_attr_deferred_probe); device_remove_file(dev, &dev_attr_online); device_remove_groups(dev, dev->groups); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a8b258e5407b..a1fbf55c4d3a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -53,19 +53,6 @@ static LIST_HEAD(deferred_probe_pending_list); static LIST_HEAD(deferred_probe_active_list); static atomic_t deferred_trigger_count = ATOMIC_INIT(0); -static ssize_t deferred_probe_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - bool value; - - mutex_lock(&deferred_probe_mutex); - value = !list_empty(&dev->p->deferred_probe); - mutex_unlock(&deferred_probe_mutex); - - return sprintf(buf, "%d\n", value); -} -DEVICE_ATTR_RO(deferred_probe); - /* * In some cases, like suspend to RAM or hibernation, It might be reasonable * to prohibit probing of devices as it could be unsafe. From 0100a3e67a9cef64d72cd3a1da86f3ddbee50363 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 12 Dec 2016 18:42:28 -0500 Subject: [PATCH 231/258] efi/x86: Prune invalid memory map entries and fix boot regression Some machines, such as the Lenovo ThinkPad W541 with firmware GNET80WW (2.28), include memory map entries with phys_addr=0x0 and num_pages=0. These machines fail to boot after the following commit, commit 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()") Fix this by removing such bogus entries from the memory map. Furthermore, currently the log output for this case (with efi=debug) looks like: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[0x0000000000000000-0xffffffffffffffff] (0MB) This is clearly wrong, and also not as informative as it could be. This patch changes it so that if we find obviously invalid memory map entries, we print an error and skip those entries. It also detects the display of the address range calculation overflow, so the new output is: [ 0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[0x0000000000000000-0x0000000000000000] (invalid) It also detects memory map sizes that would overflow the physical address, for example phys_addr=0xfffffffffffff000 and num_pages=0x0200000000000001, and prints: [ 0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[phys_addr=0xfffffffffffff000-0x20ffffffffffffffff] (invalid) It then removes these entries from the memory map. Signed-off-by: Peter Jones Signed-off-by: Ard Biesheuvel [ardb: refactor for clarity with no functional changes, avoid PAGE_SHIFT] Signed-off-by: Matt Fleming [Matt: Include bugzilla info in commit log] Cc: # v4.9+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://bugzilla.kernel.org/show_bug.cgi?id=191121 Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 66 +++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 2 files changed, 67 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 936a488d6cf6..274dfc481849 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + u64 size = efi.memmap.nr_map - n_removal; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(efi.memmap.phys_map, size); + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -472,6 +536,8 @@ void __init efi_init(void) } } + efi_clean_memmap(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } diff --git a/include/linux/efi.h b/include/linux/efi.h index 0c5420208c40..5b1af30ece55 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -103,6 +103,7 @@ typedef struct { #define EFI_PAGE_SHIFT 12 #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) +#define EFI_PAGES_MAX (U64_MAX >> EFI_PAGE_SHIFT) typedef struct { u32 type; From a12f1ae61c489076a9aeb90bddca7722bf330df3 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 13 Dec 2016 12:09:56 -0800 Subject: [PATCH 232/258] aio: fix lock dep warning lockdep reports a warnning. file_start_write/file_end_write only acquire/release the lock for regular files. So checking the files in aio side too. [ 453.532141] ------------[ cut here ]------------ [ 453.533011] WARNING: CPU: 1 PID: 1298 at ../kernel/locking/lockdep.c:3514 lock_release+0x434/0x670 [ 453.533011] DEBUG_LOCKS_WARN_ON(depth <= 0) [ 453.533011] Modules linked in: [ 453.533011] CPU: 1 PID: 1298 Comm: fio Not tainted 4.9.0+ #964 [ 453.533011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014 [ 453.533011] ffff8803a24b7a70 ffffffff8196cffb ffff8803a24b7ae8 0000000000000000 [ 453.533011] ffff8803a24b7ab8 ffffffff81091ee1 ffff8803a5dba700 00000dba00000008 [ 453.533011] ffffed0074496f59 ffff8803a5dbaf54 ffff8803ae0f8488 fffffffffffffdef [ 453.533011] Call Trace: [ 453.533011] [] dump_stack+0x67/0x9c [ 453.533011] [] __warn+0x111/0x130 [ 453.533011] [] warn_slowpath_fmt+0x97/0xb0 [ 453.533011] [] ? __warn+0x130/0x130 [ 453.533011] [] ? blk_finish_plug+0x29/0x60 [ 453.533011] [] lock_release+0x434/0x670 [ 453.533011] [] ? import_single_range+0xd4/0x110 [ 453.533011] [] ? rw_verify_area+0x65/0x140 [ 453.533011] [] ? aio_write+0x1f6/0x280 [ 453.533011] [] aio_write+0x229/0x280 [ 453.533011] [] ? aio_complete+0x640/0x640 [ 453.533011] [] ? debug_check_no_locks_freed+0x1a0/0x1a0 [ 453.533011] [] ? debug_lockdep_rcu_enabled.part.2+0x1a/0x30 [ 453.533011] [] ? debug_lockdep_rcu_enabled+0x35/0x40 [ 453.533011] [] ? __might_fault+0x7e/0xf0 [ 453.533011] [] do_io_submit+0x94c/0xb10 [ 453.533011] [] ? do_io_submit+0x23e/0xb10 [ 453.533011] [] ? SyS_io_destroy+0x270/0x270 [ 453.533011] [] ? mark_held_locks+0x23/0xc0 [ 453.533011] [] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 453.533011] [] SyS_io_submit+0x10/0x20 [ 453.533011] [] entry_SYSCALL_64_fastpath+0x18/0xad [ 453.533011] [] ? trace_hardirqs_off_caller+0xc0/0x110 [ 453.533011] ---[ end trace b2fbe664d1cc0082 ]--- Cc: Dmitry Monakhov Cc: Jan Kara Cc: Christoph Hellwig Cc: Al Viro Reviewed-by: Christoph Hellwig Signed-off-by: Shaohua Li Signed-off-by: Al Viro --- fs/aio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 4ab67e8cb776..873b4ca82ccb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2) * Tell lockdep we inherited freeze protection from submission * thread. */ - __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); + if (S_ISREG(file_inode(file)->i_mode)) + __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); file_end_write(file); } @@ -1525,7 +1526,8 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, * by telling it the lock got released so that it doesn't * complain about held lock when we return to userspace. */ - __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); + if (S_ISREG(file_inode(file)->i_mode)) + __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } kfree(iovec); return ret; From 4d22c75d4c7b5c5f4bd31054f09103ee490878fd Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 11 Jan 2017 13:25:00 -0600 Subject: [PATCH 233/258] coredump: Ensure proper size of sparse core files If the last section of a core file ends with an unmapped or zero page, the size of the file does not correspond with the last dump_skip() call. gdb complains that the file is truncated and can be confusing to users. After all of the vma sections are written, make sure that the file size is no smaller than the current file position. This problem can be demonstrated with gdb's bigcore testcase on the sparc architecture. Signed-off-by: Dave Kleikamp Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Al Viro --- fs/binfmt_elf.c | 1 + fs/coredump.c | 18 ++++++++++++++++++ include/linux/coredump.h | 1 + 3 files changed, 20 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 29a02daf08a9..422370293cfd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2298,6 +2298,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; } } + dump_truncate(cprm); if (!elf_core_write_extra_data(cprm)) goto end_coredump; diff --git a/fs/coredump.c b/fs/coredump.c index e525b6017cdf..ae6b05629ca1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -833,3 +833,21 @@ int dump_align(struct coredump_params *cprm, int align) return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); + +/* + * Ensures that file size is big enough to contain the current file + * postion. This prevents gdb from complaining about a truncated file + * if the last "write" to the file was dump_skip. + */ +void dump_truncate(struct coredump_params *cprm) +{ + struct file *file = cprm->file; + loff_t offset; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + offset = file->f_op->llseek(file, 0, SEEK_CUR); + if (i_size_read(file->f_mapping->host) < offset) + do_truncate(file->f_path.dentry, offset, 0, file); + } +} +EXPORT_SYMBOL(dump_truncate); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index d016a121a8c4..28ffa94aed6b 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -14,6 +14,7 @@ struct coredump_params; extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); +extern void dump_truncate(struct coredump_params *cprm); #ifdef CONFIG_COREDUMP extern void do_coredump(const siginfo_t *siginfo); #else From b9dc6f65bc5e232d1c05fe34b5daadc7e8bbf1fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Jan 2017 19:33:08 -0500 Subject: [PATCH 234/258] fix a fencepost error in pipe_advance() The logics in pipe_advance() used to release all buffers past the new position failed in cases when the number of buffers to release was equal to pipe->buffers. If that happened, none of them had been released, leaving pipe full. Worse, it was trivial to trigger and we end up with pipe full of uninitialized pages. IOW, it's an infoleak. Cc: stable@vger.kernel.org # v4.9 Reported-by: "Alan J. Wylie" Tested-by: "Alan J. Wylie" Signed-off-by: Al Viro --- lib/iov_iter.c | 66 ++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 25f572303801..e68604ae3ced 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -730,43 +730,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -static void pipe_advance(struct iov_iter *i, size_t size) +static inline void pipe_truncate(struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - int idx = i->idx; - size_t off = i->iov_offset, orig_sz; - - if (unlikely(i->count < size)) - size = i->count; - orig_sz = size; - - if (size) { - if (off) /* make it relative to the beginning of buffer */ - size += off - pipe->bufs[idx].offset; - while (1) { - buf = &pipe->bufs[idx]; - if (size <= buf->len) - break; - size -= buf->len; - idx = next_idx(idx, pipe); - } - buf->len = size; - i->idx = idx; - off = i->iov_offset = buf->offset + size; - } - if (off) - idx = next_idx(idx, pipe); if (pipe->nrbufs) { - int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - /* [curbuf,unused) is in use. Free [idx,unused) */ - while (idx != unused) { + size_t off = i->iov_offset; + int idx = i->idx; + int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); + if (off) { + pipe->bufs[idx].len = off - pipe->bufs[idx].offset; + idx = next_idx(idx, pipe); + nrbufs++; + } + while (pipe->nrbufs > nrbufs) { pipe_buf_release(pipe, &pipe->bufs[idx]); idx = next_idx(idx, pipe); pipe->nrbufs--; } } - i->count -= orig_sz; +} + +static void pipe_advance(struct iov_iter *i, size_t size) +{ + struct pipe_inode_info *pipe = i->pipe; + if (unlikely(i->count < size)) + size = i->count; + if (size) { + struct pipe_buffer *buf; + size_t off = i->iov_offset, left = size; + int idx = i->idx; + if (off) /* make it relative to the beginning of buffer */ + left += off - pipe->bufs[idx].offset; + while (1) { + buf = &pipe->bufs[idx]; + if (left <= buf->len) + break; + left -= buf->len; + idx = next_idx(idx, pipe); + } + i->idx = idx; + i->iov_offset = buf->offset + left; + } + i->count -= size; + /* ... and discard everything past that point */ + pipe_truncate(i); } void iov_iter_advance(struct iov_iter *i, size_t size) @@ -826,6 +833,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction, size_t count) { BUG_ON(direction != ITER_PIPE); + WARN_ON(pipe->nrbufs == pipe->buffers); i->type = direction; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); From 602d9858f07c72eab64f5f00e2fae55f9902cfbe Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Wed, 11 Jan 2017 21:56:31 +0300 Subject: [PATCH 235/258] swiotlb: ensure that page-sized mappings are page-aligned Some drivers do depend on page mappings to be page aligned. Swiotlb already enforces such alignment for mappings greater than page, extend that to page-sized mappings as well. Without this fix, nvme hits BUG() in nvme_setup_prps(), because that routine assumes page-aligned mappings. Signed-off-by: Nikita Yushchenko Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Konrad Rzeszutek Wilk --- lib/swiotlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 975b8fc4f1e1..a8d74a733a38 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -483,11 +483,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. */ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) + if (size >= PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else stride = 1; From 49def1853334396f948dcb4cedb9347abb318df5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Jan 2017 16:21:59 -0800 Subject: [PATCH 236/258] Linux 4.10-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5f1a84735ff6..96e2352d10a8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Roaring Lionus # *DOCUMENTATION* From 75f01a4c9cc291ff5cb28ca1216adb163b7a20ee Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Thu, 12 Jan 2017 19:33:18 -0500 Subject: [PATCH 237/258] openvswitch: maintain correct checksum state in conntrack actions When executing conntrack actions on skbuffs with checksum mode CHECKSUM_COMPLETE, the checksum must be updated to account for header pushes and pulls. Otherwise we get "hw csum failure" logs similar to this (ICMP packet received on geneve tunnel via ixgbe NIC): [ 405.740065] genev_sys_6081: hw csum failure [ 405.740106] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G I 4.10.0-rc3+ #1 [ 405.740108] Call Trace: [ 405.740110] [ 405.740113] dump_stack+0x63/0x87 [ 405.740116] netdev_rx_csum_fault+0x3a/0x40 [ 405.740118] __skb_checksum_complete+0xcf/0xe0 [ 405.740120] nf_ip_checksum+0xc8/0xf0 [ 405.740124] icmp_error+0x1de/0x351 [nf_conntrack_ipv4] [ 405.740132] nf_conntrack_in+0xe1/0x550 [nf_conntrack] [ 405.740137] ? find_bucket.isra.2+0x62/0x70 [openvswitch] [ 405.740143] __ovs_ct_lookup+0x95/0x980 [openvswitch] [ 405.740145] ? netif_rx_internal+0x44/0x110 [ 405.740149] ovs_ct_execute+0x147/0x4b0 [openvswitch] [ 405.740153] do_execute_actions+0x22e/0xa70 [openvswitch] [ 405.740157] ovs_execute_actions+0x40/0x120 [openvswitch] [ 405.740161] ovs_dp_process_packet+0x84/0x120 [openvswitch] [ 405.740166] ovs_vport_receive+0x73/0xd0 [openvswitch] [ 405.740168] ? udp_rcv+0x1a/0x20 [ 405.740170] ? ip_local_deliver_finish+0x93/0x1e0 [ 405.740172] ? ip_local_deliver+0x6f/0xe0 [ 405.740174] ? ip_rcv_finish+0x3a0/0x3a0 [ 405.740176] ? ip_rcv_finish+0xdb/0x3a0 [ 405.740177] ? ip_rcv+0x2a7/0x400 [ 405.740180] ? __netif_receive_skb_core+0x970/0xa00 [ 405.740185] netdev_frame_hook+0xd3/0x160 [openvswitch] [ 405.740187] __netif_receive_skb_core+0x1dc/0xa00 [ 405.740194] ? ixgbe_clean_rx_irq+0x46d/0xa20 [ixgbe] [ 405.740197] __netif_receive_skb+0x18/0x60 [ 405.740199] netif_receive_skb_internal+0x40/0xb0 [ 405.740201] napi_gro_receive+0xcd/0x120 [ 405.740204] gro_cell_poll+0x57/0x80 [geneve] [ 405.740206] net_rx_action+0x260/0x3c0 [ 405.740209] __do_softirq+0xc9/0x28c [ 405.740211] irq_exit+0xd9/0xf0 [ 405.740213] do_IRQ+0x51/0xd0 [ 405.740215] common_interrupt+0x93/0x93 Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Lance Richardson Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 6b78bab27755..54253ea5976e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, int hooknum, nh_off, err = NF_ACCEPT; nh_off = skb_network_offset(skb); - skb_pull(skb, nh_off); + skb_pull_rcsum(skb, nh_off); /* See HOOK2MANIP(). */ if (maniptype == NF_NAT_MANIP_SRC) @@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: skb_push(skb, nh_off); + skb_postpush_rcsum(skb, skb->data, nh_off); return err; } @@ -886,7 +887,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, /* The conntrack module expects to be working at L3. */ nh_ofs = skb_network_offset(skb); - skb_pull(skb, nh_ofs); + skb_pull_rcsum(skb, nh_ofs); if (key->ip.frag != OVS_FRAG_TYPE_NONE) { err = handle_fragments(net, key, info->zone.id, skb); @@ -900,6 +901,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, err = ovs_ct_lookup(net, key, info, skb); skb_push(skb, nh_ofs); + skb_postpush_rcsum(skb, skb->data, nh_ofs); if (err) kfree_skb(skb); return err; From ee6ff743e3a4b697e8286054667d7e4e1b56510d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 12:05:03 +0100 Subject: [PATCH 238/258] mmc: core: Restore parts of the polling policy when switch to HS/HS DDR Regressions for not being able to detect an eMMC HS DDR mode card has been reported for the sdhci-esdhc-imx driver, but potentially other sdhci variants may suffer from the similar problem. The commit e173f8911f09 ("mmc: core: Update CMD13 polling policy when switch to HS DDR mode"), is causing the problem. It seems that change moved one step to far, regarding changing the host's timing before polling for a busy card. To fix this, let's move back to the behaviour when the host's timing is updated after the polling, but before the switch status is fetched and validated. In cases when polling with CMD13, we keep validating the switch status at each attempt. However, to align with the other card busy detections mechanism, let's fetch and validate the switch status also after the host's timing is updated. Reported-by: Clemens Gruber Reported-by: Gary Bisson Fixes: e173f8911f09 ("mmc: core: Update CMD13 polling policy when switch..") Cc: Shawn Lin Cc: Dong Aisheng Cc: Haibo Chen Signed-off-by: Ulf Hansson Tested-by: Clemens Gruber Tested-by: Jagan Teki Reviewed-by: Shawn Lin Tested-by: Haibo Chen Reviewed-by: Dong Aisheng --- drivers/mmc/core/mmc_ops.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index b11c3455b040..e6ea8503f40c 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -506,9 +506,6 @@ static int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms, } } while (busy); - if (host->ops->card_busy && send_status) - return mmc_switch_status(card); - return 0; } @@ -577,24 +574,26 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, if (!use_busy_signal) goto out; - /* Switch to new timing before poll and check switch status. */ - if (timing) - mmc_set_timing(host, timing); - /*If SPI or used HW busy detection above, then we don't need to poll. */ if (((host->caps & MMC_CAP_WAIT_WHILE_BUSY) && use_r1b_resp) || - mmc_host_is_spi(host)) { - if (send_status) - err = mmc_switch_status(card); + mmc_host_is_spi(host)) goto out_tim; - } /* Let's try to poll to find out when the command is completed. */ err = mmc_poll_for_busy(card, timeout_ms, send_status, retry_crc_err); + if (err) + goto out; out_tim: - if (err && timing) - mmc_set_timing(host, old_timing); + /* Switch to new timing before check switch status. */ + if (timing) + mmc_set_timing(host, timing); + + if (send_status) { + err = mmc_switch_status(card); + if (err && timing) + mmc_set_timing(host, old_timing); + } out: mmc_retune_release(host); From 8cf699ec849f4ca1413cea01289bd7d37dbcc626 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jan 2017 08:39:24 -0800 Subject: [PATCH 239/258] mlx4: do not call napi_schedule() without care Disable BH around the call to napi_schedule() to avoid following warning [ 52.095499] NOHZ: local_softirq_pending 08 [ 52.421291] NOHZ: local_softirq_pending 08 [ 52.608313] NOHZ: local_softirq_pending 08 Fixes: 8d59de8f7bb3 ("net/mlx4_en: Process all completions in RX rings after port goes up") Signed-off-by: Eric Dumazet Cc: Erez Shitrit Cc: Eugenia Emantayev Cc: Tariq Toukan Acked-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4910d9af1933..761f8b12399c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1748,8 +1748,11 @@ int mlx4_en_start_port(struct net_device *dev) /* Process all completions if exist to prevent * the queues freezing if they are full */ - for (i = 0; i < priv->rx_ring_num; i++) + for (i = 0; i < priv->rx_ring_num; i++) { + local_bh_disable(); napi_schedule(&priv->rx_cq[i]->napi); + local_bh_enable(); + } netif_tx_start_all_queues(dev); netif_device_attach(dev); From 8ec3e8a192ba6f13be4522ee81227c792c86fb1a Mon Sep 17 00:00:00 2001 From: Masaru Nagai Date: Mon, 16 Jan 2017 11:45:21 +0100 Subject: [PATCH 240/258] ravb: do not use zero-length alignment DMA descriptor Due to alignment requirements of the hardware transmissions are split into two DMA descriptors, a small padding descriptor of 0 - 3 bytes in length followed by a descriptor for rest of the packet. In the case of IP packets the first descriptor will never be zero due to the way that the stack aligns buffers for IP packets. However, for non-IP packets it may be zero. In that case it has been reported that timeouts occur, presumably because transmission stops at the first zero-length DMA descriptor and thus the packet is not transmitted. However, in my environment a BUG is triggered as follows: [ 20.381417] ------------[ cut here ]------------ [ 20.386054] kernel BUG at lib/swiotlb.c:495! [ 20.390324] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 20.395805] Modules linked in: [ 20.398862] CPU: 0 PID: 2089 Comm: mz Not tainted 4.10.0-rc3-00001-gf13ad2db193f #162 [ 20.406689] Hardware name: Renesas Salvator-X board based on r8a7796 (DT) [ 20.413474] task: ffff80063b1f1900 task.stack: ffff80063a71c000 [ 20.419404] PC is at swiotlb_tbl_map_single+0x178/0x2ec [ 20.424625] LR is at map_single+0x4c/0x98 [ 20.428629] pc : [] lr : [] pstate: 800001c5 [ 20.436019] sp : ffff80063a71f9b0 [ 20.439327] x29: ffff80063a71f9b0 x28: ffff80063a20d500 [ 20.444636] x27: ffff000008ed5000 x26: 0000000000000000 [ 20.449944] x25: 000000067abe2adc x24: 0000000000000000 [ 20.455252] x23: 0000000000200000 x22: 0000000000000001 [ 20.460559] x21: 0000000000175ffe x20: ffff80063b2a0010 [ 20.465866] x19: 0000000000000000 x18: 0000ffffcae6fb20 [ 20.471173] x17: 0000ffffa09ba018 x16: ffff0000087c8b70 [ 20.476480] x15: 0000ffffa084f588 x14: 0000ffffa09cfa14 [ 20.481787] x13: 0000ffffcae87ff0 x12: 000000000063abe2 [ 20.487098] x11: ffff000008096360 x10: ffff80063abe2adc [ 20.492407] x9 : 0000000000000000 x8 : 0000000000000000 [ 20.497718] x7 : 0000000000000000 x6 : ffff000008ed50d0 [ 20.503028] x5 : 0000000000000000 x4 : 0000000000000001 [ 20.508338] x3 : 0000000000000000 x2 : 000000067abe2adc [ 20.513648] x1 : 00000000bafff000 x0 : 0000000000000000 [ 20.518958] [ 20.520446] Process mz (pid: 2089, stack limit = 0xffff80063a71c000) [ 20.526798] Stack: (0xffff80063a71f9b0 to 0xffff80063a720000) [ 20.532543] f9a0: ffff80063a71fa30 ffff00000839c680 [ 20.540374] f9c0: ffff80063b2a0010 ffff80063b2a0010 0000000000000001 0000000000000000 [ 20.548204] f9e0: 000000000000006e ffff80063b23c000 ffff80063b23c000 0000000000000000 [ 20.556034] fa00: ffff80063b23c000 ffff80063a20d500 000000013b1f1900 0000000000000000 [ 20.563864] fa20: ffff80063ffd18e0 ffff80063b2a0010 ffff80063a71fa60 ffff00000839cd10 [ 20.571694] fa40: ffff80063b2a0010 0000000000000000 ffff80063ffd18e0 000000067abe2adc [ 20.579524] fa60: ffff80063a71fa90 ffff000008096380 ffff80063b2a0010 0000000000000000 [ 20.587353] fa80: 0000000000000000 0000000000000001 ffff80063a71fac0 ffff00000864f770 [ 20.595184] faa0: ffff80063b23caf0 0000000000000000 0000000000000000 0000000000000140 [ 20.603014] fac0: ffff80063a71fb60 ffff0000087e6498 ffff80063a20d500 ffff80063b23c000 [ 20.610843] fae0: 0000000000000000 ffff000008daeaf0 0000000000000000 ffff000008daeb00 [ 20.618673] fb00: ffff80063a71fc0c ffff000008da7000 ffff80063b23c090 ffff80063a44f000 [ 20.626503] fb20: 0000000000000000 ffff000008daeb00 ffff80063a71fc0c ffff000008da7000 [ 20.634333] fb40: ffff80063b23c090 0000000000000000 ffff800600000037 ffff0000087e63d8 [ 20.642163] fb60: ffff80063a71fbc0 ffff000008807510 ffff80063a692400 ffff80063a20d500 [ 20.649993] fb80: ffff80063a44f000 ffff80063b23c000 ffff80063a69249c 0000000000000000 [ 20.657823] fba0: 0000000000000000 ffff80063a087800 ffff80063b23c000 ffff80063a20d500 [ 20.665653] fbc0: ffff80063a71fc10 ffff0000087e67dc ffff80063a20d500 ffff80063a692400 [ 20.673483] fbe0: ffff80063b23c000 0000000000000000 ffff80063a44f000 ffff80063a69249c [ 20.681312] fc00: ffff80063a5f1a10 000000103a087800 ffff80063a71fc70 ffff0000087e6b24 [ 20.689142] fc20: ffff80063a5f1a80 ffff80063a71fde8 000000000000000f 00000000000005ea [ 20.696972] fc40: ffff80063a5f1a10 0000000000000000 000000000000000f ffff00000887fbd0 [ 20.704802] fc60: fffffff43a5f1a80 0000000000000000 ffff80063a71fc80 ffff000008880240 [ 20.712632] fc80: ffff80063a71fd90 ffff0000087c7a34 ffff80063afc7180 0000000000000000 [ 20.720462] fca0: 0000ffffcae6fe18 0000000000000014 0000000060000000 0000000000000015 [ 20.728292] fcc0: 0000000000000123 00000000000000ce ffff0000088d2000 ffff80063b1f1900 [ 20.736122] fce0: 0000000000008933 ffff000008e7cb80 ffff80063a71fd80 ffff0000087c50a4 [ 20.743951] fd00: 0000000000008933 ffff000008e7cb80 ffff000008e7cb80 000000100000000e [ 20.751781] fd20: ffff80063a71fe4c 0000ffff00000300 0000000000000123 0000000000000000 [ 20.759611] fd40: 0000000000000000 ffff80063b1f0000 000000000000000e 0000000000000300 [ 20.767441] fd60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 20.775271] fd80: 0000000000000000 0000000000000000 ffff80063a71fda0 ffff0000087c8c20 [ 20.783100] fda0: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 [ 20.790930] fdc0: ffffffffffffffff 0000ffffa0903078 0000000000000000 000000001ea87232 [ 20.798760] fde0: 000000000000000f ffff80063a71fe40 ffff800600000014 ffff000000000001 [ 20.806590] fe00: 0000000000000000 0000000000000000 ffff80063a71fde8 0000000000000000 [ 20.814420] fe20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 [ 20.822249] fe40: 0000000203000011 0000000000000000 0000000000000000 ffff80063a68aa00 [ 20.830079] fe60: ffff80063a68aa00 0000000000000003 0000000000008933 ffff0000081f1b9c [ 20.837909] fe80: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 [ 20.845739] fea0: ffffffffffffffff 0000ffffa07ca81c 0000000060000000 0000000000000015 [ 20.853569] fec0: 0000000000000003 000000001ea87232 000000000000000f 0000000000000000 [ 20.861399] fee0: 0000ffffcae6fe18 0000000000000014 0000000000000300 0000000000000000 [ 20.869228] ff00: 00000000000000ce 0000000000000000 00000000ffffffff 0000000000000000 [ 20.877059] ff20: 0000000000000002 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 [ 20.884888] ff40: 0000000000000000 0000ffffa09ba018 0000ffffcae6fb20 000000001ea87010 [ 20.892718] ff60: 0000ffffa09b9000 0000ffffcae6fe30 0000ffffcae6fe18 000000000000000f [ 20.900548] ff80: 0000000000000003 000000001ea87232 0000000000000000 0000000000000000 [ 20.908378] ffa0: 0000000000000000 0000ffffcae6fdc0 0000ffffa09a7824 0000ffffcae6fdc0 [ 20.916208] ffc0: 0000ffffa0903078 0000000060000000 0000000000000003 00000000000000ce [ 20.924038] ffe0: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff [ 20.931867] Call trace: [ 20.934312] Exception stack(0xffff80063a71f7e0 to 0xffff80063a71f910) [ 20.940750] f7e0: 0000000000000000 0001000000000000 ffff80063a71f9b0 ffff00000839c4c0 [ 20.948580] f800: ffff80063a71f840 ffff00000888a6e4 ffff80063a24c418 ffff80063a24c448 [ 20.956410] f820: 0000000000000000 ffff00000811cd54 ffff80063a71f860 ffff80063a24c458 [ 20.964240] f840: ffff80063a71f870 ffff00000888b258 ffff80063a24c418 0000000000000001 [ 20.972070] f860: ffff80063a71f910 ffff80063a7b7028 ffff80063a71f890 ffff0000088825e4 [ 20.979899] f880: 0000000000000000 00000000bafff000 000000067abe2adc 0000000000000000 [ 20.987729] f8a0: 0000000000000001 0000000000000000 ffff000008ed50d0 0000000000000000 [ 20.995560] f8c0: 0000000000000000 0000000000000000 ffff80063abe2adc ffff000008096360 [ 21.003390] f8e0: 000000000063abe2 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 [ 21.011219] f900: ffff0000087c8b70 0000ffffa09ba018 [ 21.016097] [] swiotlb_tbl_map_single+0x178/0x2ec [ 21.022362] [] map_single+0x4c/0x98 [ 21.027411] [] swiotlb_map_page+0xa4/0x138 [ 21.033072] [] __swiotlb_map_page+0x20/0x7c [ 21.038821] [] ravb_start_xmit+0x174/0x668 [ 21.044484] [] dev_hard_start_xmit+0x8c/0x120 [ 21.050407] [] sch_direct_xmit+0x108/0x1a0 [ 21.056064] [] __dev_queue_xmit+0x194/0x4cc [ 21.061807] [] dev_queue_xmit+0x10/0x18 [ 21.067214] [] packet_sendmsg+0xf40/0x1220 [ 21.072873] [] sock_sendmsg+0x18/0x2c [ 21.078097] [] SyS_sendto+0xb0/0xf0 [ 21.083150] [] el0_svc_naked+0x24/0x28 [ 21.088462] Code: d34bfef7 2a1803f3 1a9f86d6 35fff878 (d4210000) [ 21.094611] ---[ end trace 5bc544ad491f3814 ]--- [ 21.099234] Kernel panic - not syncing: Fatal exception in interrupt [ 21.105587] Kernel Offset: disabled [ 21.109073] Memory Limit: none [ 21.112126] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Fixes: 2f45d1902acf ("ravb: minimize TX data copying") Signed-off-by: Masaru Nagai Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 5e5ad978eab9..89ac1e3f6175 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1504,6 +1504,19 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data; + /* Zero length DMA descriptors are problematic as they seem to + * terminate DMA transfers. Avoid them by simply using a length of + * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN. + * + * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of + * data by the call to skb_put_padto() above this is safe with + * respect to both the length of the first DMA descriptor (len) + * overflowing the available data and the length of the second DMA + * descriptor (skb->len - len) being negative. + */ + if (len == 0) + len = DPTR_ALIGN; + memcpy(buffer, skb->data, len); dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE); if (dma_mapping_error(ndev->dev.parent, dma_addr)) From d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Jan 2017 14:20:54 +0100 Subject: [PATCH 241/258] cpmac: remove hopeless #warning The #warning was present 10 years ago when the driver first got merged. As the platform is rather obsolete by now, it seems very unlikely that the warning will cause anyone to fix the code properly. kernelci.org reports the warning for every build in the meantime, so I think it's better to just turn it into a code comment to reduce noise. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 77c88fcf2b86..9b8a30bf939b 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1210,7 +1210,7 @@ int cpmac_init(void) goto fail_alloc; } -#warning FIXME: unhardcode gpio&reset bits + /* FIXME: unhardcode gpio&reset bits */ ar7_gpio_disable(26); ar7_gpio_disable(27); ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); From fe68d8bfe59c561664aa87d827aa4b320eb08895 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 13 Jan 2017 22:38:27 +0100 Subject: [PATCH 242/258] be2net: fix status check in be_cmd_pmac_add() Return value from be_mcc_notify_wait() contains a base completion status together with an additional status. The base_status() macro need to be used to access base status. Fixes: e3a7ae2 be2net: Changing MAC Address of a VF was broken Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 0e74529a4209..30e855004c57 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1118,7 +1118,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, err: mutex_unlock(&adapter->mcc_lock); - if (status == MCC_STATUS_UNAUTHORIZED_REQUEST) + if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; return status; From 6d928ae590c8d58cfd5cca997d54394de139cbb7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 13 Jan 2017 22:38:28 +0100 Subject: [PATCH 243/258] be2net: don't delete MAC on close on unprivileged BE3 VFs BE3 VFs without FILTMGMT privilege are not allowed to modify its MAC, VLAN table and UC/MC lists. So don't try to delete MAC on such VFs. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ec010ced6c99..d606e20fe69e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3609,7 +3609,11 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) static void be_disable_if_filters(struct be_adapter *adapter) { - be_dev_mac_del(adapter, adapter->pmac_id[0]); + /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */ + if (!BEx_chip(adapter) || !be_virtfn(adapter) || + check_privilege(adapter, BE_PRIV_FILTMGMT)) + be_dev_mac_del(adapter, adapter->pmac_id[0]); + be_clear_uc_list(adapter); be_clear_mc_list(adapter); From 34393529163af7163ef8459808e3cf2af7db7f16 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 13 Jan 2017 22:38:29 +0100 Subject: [PATCH 244/258] be2net: fix MAC addr setting on privileged BE3 VFs During interface opening MAC address stored in netdev->dev_addr is programmed in the HW with exception of BE3 VFs where the initial MAC is programmed by parent PF. This is OK when MAC address is not changed when an interfaces is down. In this case the requested MAC is stored to netdev->dev_addr and later is stored into HW during opening. But this is not done for all BE3 VFs so the NIC HW does not know anything about this change and all traffic is filtered. This is the case of bonding if fail_over_mac == 0 where the MACs of the slaves are changed while they are down. The be2net behavior is too restrictive because if a BE3 VF has the FILTMGMT privilege then it is able to modify its MAC without any restriction. To solve the described problem the driver should take care about these privileged BE3 VFs so the MAC is programmed during opening. And by contrast unpriviled BE3 VFs should not be allowed to change its MAC in any case. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d606e20fe69e..1a7f8ad7b9c6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -318,6 +318,13 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) return 0; + /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC + * address + */ + if (BEx_chip(adapter) && be_virtfn(adapter) && + !check_privilege(adapter, BE_PRIV_FILTMGMT)) + return -EPERM; + /* if device is not running, copy MAC to netdev->dev_addr */ if (!netif_running(netdev)) goto done; @@ -3766,8 +3773,9 @@ static int be_enable_if_filters(struct be_adapter *adapter) if (status) return status; - /* For BE3 VFs, the PF programs the initial MAC address */ - if (!(BEx_chip(adapter) && be_virtfn(adapter))) { + /* Don't add MAC on BE3 VFs without FILTMGMT privilege */ + if (!BEx_chip(adapter) || !be_virtfn(adapter) || + check_privilege(adapter, BE_PRIV_FILTMGMT)) { status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; From 1666d49e1d416fcc2cce708242a52fe3317ea8ba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 12 Jan 2017 21:19:37 +0800 Subject: [PATCH 245/258] mld: do not remove mld souce list info when set link down This is an IPv6 version of commit 24803f38a5c0 ("igmp: do not remove igmp souce list..."). In mld_del_delrec(), we will restore back all source filter info instead of flush them. Move mld_clear_delrec() from ipv6_mc_down() to ipv6_mc_destroy_dev() since we should not remove source list info when set link down. Remove igmp6_group_dropped() in ipv6_mc_destroy_dev() since we have called it in ipv6_mc_down(). Also clear all source info after igmp6_group_dropped() instead of in it because ipv6_mc_down() will call igmp6_group_dropped(). Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 51 ++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 14a3903f1c82..7139fffd61b6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -81,7 +81,7 @@ static void mld_gq_timer_expire(unsigned long data); static void mld_ifc_timer_expire(unsigned long data); static void mld_ifc_event(struct inet6_dev *idev); static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr); +static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); static void mld_clear_delrec(struct inet6_dev *idev); static bool mld_in_v1_mode(const struct inet6_dev *idev); static int sf_setstate(struct ifmcaddr6 *pmc); @@ -692,9 +692,9 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) dev_mc_del(dev, buf); } - if (mc->mca_flags & MAF_NOREPORT) - goto done; spin_unlock_bh(&mc->mca_lock); + if (mc->mca_flags & MAF_NOREPORT) + return; if (!mc->idev->dead) igmp6_leave_group(mc); @@ -702,8 +702,6 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) spin_lock_bh(&mc->mca_lock); if (del_timer(&mc->mca_timer)) atomic_dec(&mc->mca_refcnt); -done: - ip6_mc_clear_src(mc); spin_unlock_bh(&mc->mca_lock); } @@ -748,10 +746,11 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) spin_unlock_bh(&idev->mc_lock); } -static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) +static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ifmcaddr6 *pmc, *pmc_prev; - struct ip6_sf_list *psf, *psf_next; + struct ip6_sf_list *psf; + struct in6_addr *pmca = &im->mca_addr; spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; @@ -768,14 +767,20 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) } spin_unlock_bh(&idev->mc_lock); + spin_lock_bh(&im->mca_lock); if (pmc) { - for (psf = pmc->mca_tomb; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + im->idev = pmc->idev; + im->mca_crcount = idev->mc_qrv; + im->mca_sfmode = pmc->mca_sfmode; + if (pmc->mca_sfmode == MCAST_INCLUDE) { + im->mca_tomb = pmc->mca_tomb; + im->mca_sources = pmc->mca_sources; + for (psf = im->mca_sources; psf; psf = psf->sf_next) + psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); - kfree(pmc); } + spin_unlock_bh(&im->mca_lock); } static void mld_clear_delrec(struct inet6_dev *idev) @@ -904,7 +909,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) mca_get(mc); write_unlock_bh(&idev->lock); - mld_del_delrec(idev, &mc->mca_addr); + mld_del_delrec(idev, mc); igmp6_group_added(mc); ma_put(mc); return 0; @@ -927,6 +932,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_unlock_bh(&idev->lock); igmp6_group_dropped(ma); + ip6_mc_clear_src(ma); ma_put(ma); return 0; @@ -2501,15 +2507,17 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Withdraw multicast list */ read_lock_bh(&idev->lock); - mld_ifc_stop_timer(idev); - mld_gq_stop_timer(idev); - mld_dad_stop_timer(idev); for (i = idev->mc_list; i; i = i->next) igmp6_group_dropped(i); - read_unlock_bh(&idev->lock); - mld_clear_delrec(idev); + /* Should stop timer after group drop. or we will + * start timer again in mld_ifc_event() + */ + mld_ifc_stop_timer(idev); + mld_gq_stop_timer(idev); + mld_dad_stop_timer(idev); + read_unlock_bh(&idev->lock); } static void ipv6_mc_reset(struct inet6_dev *idev) @@ -2531,8 +2539,10 @@ void ipv6_mc_up(struct inet6_dev *idev) read_lock_bh(&idev->lock); ipv6_mc_reset(idev); - for (i = idev->mc_list; i; i = i->next) + for (i = idev->mc_list; i; i = i->next) { + mld_del_delrec(idev, i); igmp6_group_added(i); + } read_unlock_bh(&idev->lock); } @@ -2565,6 +2575,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) /* Deactivate timers */ ipv6_mc_down(idev); + mld_clear_delrec(idev); /* Delete all-nodes address. */ /* We cannot call ipv6_dev_mc_dec() directly, our caller in @@ -2579,11 +2590,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((i = idev->mc_list) != NULL) { idev->mc_list = i->next; + write_unlock_bh(&idev->lock); - - igmp6_group_dropped(i); ma_put(i); - write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); From 02ca0423fd65a0a9c4d70da0dbb8f4b8503f08c7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 13 Jan 2017 10:12:20 +0100 Subject: [PATCH 246/258] ip6_tunnel: Account for tunnel header in tunnel MTU With ip6gre we have a tunnel header which also makes the tunnel MTU smaller. We need to reserve room for it. Previously we were using up space reserved for the Tunnel Encapsulation Limit option header (RFC 2473). Also, after commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") our contract with the caller has changed. Now we check if the packet length exceeds the tunnel MTU after the tunnel header has been pushed, unlike before. This is reflected in the check where we look at the packet length minus the size of the tunnel header, which is already accounted for in tunnel MTU. Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 36d292180942..753d6d0860fb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1108,7 +1108,7 @@ route_lookup: t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen; + mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1117,7 +1117,7 @@ route_lookup: mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; From 34c55cf2fc75f8bf6ba87df321038c064cf2d426 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Fri, 13 Jan 2017 09:32:34 -0500 Subject: [PATCH 247/258] net: phy: dp83867: allow RGMII_TXID/RGMII_RXID interface types Currently dp83867 driver returns error if phy interface type PHY_INTERFACE_MODE_RGMII_RXID is used to set the rx only internal delay. Similarly issue happens for PHY_INTERFACE_MODE_RGMII_TXID. Fix this by checking also the interface type if a particular delay value is missing in the phy dt bindings. Also update the DT document accordingly. Signed-off-by: Murali Karicheri Signed-off-by: Sekhar Nori Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ti,dp83867.txt | 6 ++++-- drivers/net/phy/dp83867.c | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt index 85bf945b898f..afe9630a5e7d 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.txt +++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt @@ -3,9 +3,11 @@ Required properties: - reg - The ID number for the phy, usually a small integer - ti,rx-internal-delay - RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h - for applicable values + for applicable values. Required only if interface type is + PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID - ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h - for applicable values + for applicable values. Required only if interface type is + PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable values diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index e84ae084e259..ca1b462bf7b2 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -132,12 +132,16 @@ static int dp83867_of_init(struct phy_device *phydev) ret = of_property_read_u32(of_node, "ti,rx-internal-delay", &dp83867->rx_id_delay); - if (ret) + if (ret && + (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)) return ret; ret = of_property_read_u32(of_node, "ti,tx-internal-delay", &dp83867->tx_id_delay); - if (ret) + if (ret && + (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)) return ret; return of_property_read_u32(of_node, "ti,fifo-depth", From 57d5f64d83ab5b5a5118b1597386dd76eaf4340d Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Fri, 13 Jan 2017 15:46:25 +0100 Subject: [PATCH 248/258] tipc: allocate user memory with GFP_KERNEL flag Until now, we allocate memory always with GFP_ATOMIC flag. When the system is under memory pressure and a user tries to send, the send fails due to low memory. However, the user application can wait for free memory if we allocate it using GFP_KERNEL flag. In this commit, we use allocate memory with GFP_KERNEL for all user allocation. Reported-by: Rune Torgersen Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/discover.c | 4 ++-- net/tipc/link.c | 2 +- net/tipc/msg.c | 16 ++++++++-------- net/tipc/msg.h | 2 +- net/tipc/name_distr.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 6b109a808d4c..02462d67d191 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rskb = tipc_buf_acquire(MAX_H_SIZE); + rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!rskb) return; tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); @@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - req->buf = tipc_buf_acquire(MAX_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/link.c b/net/tipc/link.c index bda89bf9f4ff..4e8647aef01c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1395,7 +1395,7 @@ tnl: msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); - tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a22be502f1bd..ab02d0742476 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -58,12 +58,12 @@ static unsigned int align(unsigned int i) * NOTE: Headroom is reserved to allow prepending of a data link header. * There may also be unrequested tailroom present at the buffer's end. */ -struct sk_buff *tipc_buf_acquire(u32 size) +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + skb = alloc_skb_fclone(buf_size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, struct tipc_msg *msg; struct sk_buff *buf; - buf = tipc_buf_acquire(hdr_sz + data_sz); + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); if (unlikely(!buf)) return NULL; @@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { - skb = tipc_buf_acquire(msz); + skb = tipc_buf_acquire(msz, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; skb_orphan(skb); @@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, msg_set_importance(&pkthdr, msg_importance(mhdr)); /* Prepare first fragment */ - skb = tipc_buf_acquire(pktmax); + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); if (!skb) return -ENOMEM; skb_orphan(skb); @@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, pktsz = drem + INT_H_SIZE; else pktsz = pktmax; - skb = tipc_buf_acquire(pktsz); + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); if (!skb) { rc = -ENOMEM; goto error; @@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, if (msz > (max / 2)) return false; - _skb = tipc_buf_acquire(max); + _skb = tipc_buf_acquire(max, GFP_ATOMIC); if (!_skb) return false; @@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Never return SHORT header; expand by replacing buffer if necessary */ if (msg_short(hdr)) { - *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen); + *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC); if (!*skb) goto exit; memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 8d408612ffa4..2c3dc38abf9c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -820,7 +820,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); } -struct sk_buff *tipc_buf_acquire(u32 size); +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c1cfd92de17a..23f8899e0f8c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -69,7 +69,7 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); struct tipc_msg *msg; if (buf != NULL) { From f1f7714ea51c56b7163fb1a5acf39c6a204dd758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Jan 2017 23:38:15 +0100 Subject: [PATCH 249/258] bpf: rework prog_digest into prog_tag Commit 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") was recently discussed, partially due to admittedly suboptimal name of "prog_digest" in combination with sha1 hash usage, thus inevitably and rightfully concerns about its security in terms of collision resistance were raised with regards to use-cases. The intended use cases are for debugging resp. introspection only for providing a stable "tag" over the instruction sequence that both kernel and user space can calculate independently. It's not usable at all for making a security relevant decision. So collisions where two different instruction sequences generate the same tag can happen, but ideally at a rather low rate. The "tag" will be dumped in hex and is short enough to introspect in tracepoints or kallsyms output along with other data such as stack trace, etc. Thus, this patch performs a rename into prog_tag and truncates the tag to a short output (64 bits) to make it obvious it's not collision-free. Should in future a hash or facility be needed with a security relevant focus, then we can think about requirements, constraints, etc that would fit to that situation. For now, rework the exposed parts for the current use cases as long as nothing has been released yet. Tested on x86_64 and s390x. Fixes: 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/filter.h | 6 ++++-- include/uapi/linux/pkt_cls.h | 2 +- include/uapi/linux/tc_act/tc_bpf.h | 2 +- kernel/bpf/core.c | 14 ++++++++------ kernel/bpf/syscall.c | 8 ++++---- kernel/bpf/verifier.c | 2 +- net/sched/act_bpf.c | 5 ++--- net/sched/cls_bpf.c | 4 ++-- 9 files changed, 24 insertions(+), 21 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f74ae68086dc..05cf951df3fe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); -int bpf_prog_calc_digest(struct bpf_prog *fp); +int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index a0934e6c9bab..e4eb2546339a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -57,6 +57,8 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +#define BPF_TAG_SIZE 8 + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -408,7 +410,7 @@ struct bpf_prog { kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ - u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ + u8 tag[BPF_TAG_SIZE]; struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, @@ -519,7 +521,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) return prog->len * sizeof(struct bpf_insn); } -static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog) +static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) { return round_up(bpf_prog_insn_size(prog) + sizeof(__be64) + 1, SHA_MESSAGE_BYTES); diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cb4bcdc58543..a4dcd88ec271 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,7 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, - TCA_BPF_DIGEST, + TCA_BPF_TAG, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index a6b88a6f7f71..975b50dc8d1d 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,7 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, - TCA_ACT_BPF_DIGEST, + TCA_ACT_BPF_TAG, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1eb4f1303756..503d4211988a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -146,10 +146,11 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } -int bpf_prog_calc_digest(struct bpf_prog *fp) +int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); - u32 raw_size = bpf_prog_digest_scratch_size(fp); + u32 raw_size = bpf_prog_tag_scratch_size(fp); + u32 digest[SHA_DIGEST_WORDS]; u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; @@ -162,7 +163,7 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) if (!raw) return -ENOMEM; - sha_init(fp->digest); + sha_init(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation @@ -204,13 +205,14 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { - sha_transform(fp->digest, todo, ws); + sha_transform(digest, todo, ws); todo += SHA_MESSAGE_BYTES; } - result = (__force __be32 *)fp->digest; + result = (__force __be32 *)digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) - result[i] = cpu_to_be32(fp->digest[i]); + result[i] = cpu_to_be32(digest[i]); + memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e89acea22ecf..1d6b29e4e2c3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -688,17 +688,17 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; - char prog_digest[sizeof(prog->digest) * 2 + 1] = { }; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_digest, prog->digest, sizeof(prog->digest)); + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" - "prog_digest:\t%s\n" + "prog_tag:\t%s\n" "memlock:\t%llu\n", prog->type, prog->jited, - prog_digest, + prog_tag, prog->pages * 1ULL << PAGE_SHIFT); } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83ed2f8f6f22..cdc43b899f28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2936,7 +2936,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i, j, err; - err = bpf_prog_calc_digest(env->prog); + err = bpf_prog_calc_tag(env->prog); if (err) return err; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1c60317f0121..520baa41cba3 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -123,12 +123,11 @@ static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST, - sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index adc776048d1a..d9c97018317d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -555,11 +555,11 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } From 8a367e74c0120ef68c8c70d5a025648c96626dff Mon Sep 17 00:00:00 2001 From: Basil Gunn Date: Sat, 14 Jan 2017 12:18:55 -0800 Subject: [PATCH 250/258] ax25: Fix segfault after sock connection timeout The ax.25 socket connection timed out & the sock struct has been previously taken down ie. sock struct is now a NULL pointer. Checking the sock_flag causes the segfault. Check if the socket struct pointer is NULL before checking sock_flag. This segfault is seen in timed out netrom connections. Please submit to -stable. Signed-off-by: Basil Gunn Signed-off-by: David S. Miller --- net/ax25/ax25_subr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 4855d18a8511..038b109b2be7 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); From abeffce90c7f6ce74de9794ad0977a168edf8ef6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 15 Jan 2017 19:50:46 +0200 Subject: [PATCH 251/258] net/mlx5e: Fix a -Wmaybe-uninitialized warning As found by Olof's build bot, we gain a harmless warning about a potential uninitialized variable reference in mlx5: drivers/net/ethernet/mellanox/mlx5/core/en_tc.c: In function 'parse_tc_fdb_actions': drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:769:13: warning: 'out_dev' may be used uninitialized in this function [-Wmaybe-uninitialized] drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:811:21: note: 'out_dev' was declared here This was introduced through the addition of an 'IS_ERR/PTR_ERR' pair that gcc is unfortunately unable to completely figure out. The problem being gcc cannot tell that if(IS_ERR()) in mlx5e_route_lookup_ipv4() is equivalent to checking if(err) later, so it assumes that 'out_dev' is used after the 'return PTR_ERR(rt)'. The PTR_ERR_OR_ZERO() case by comparison is fairly easy to detect by gcc, so it can't get that wrong, so it no longer warns. Hadar Hen Zion already attempted to fix the warning earlier by adding fake initializations, but that ended up not fully addressing all warnings, so I'm reverting it now that it is no longer needed. Link: http://arm-soc.lixom.net/buildlogs/mainline/v4.10-rc3-98-gcff3b2c/ Fixes: a42485eb0ee4 ("net/mlx5e: TC ipv4 tunnel encap offload error flow fixes") Fixes: a757d108dc1a ("net/mlx5e: Fix kbuild warnings for uninitialized parameters") Signed-off-by: Arnd Bergmann Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 118cea5e5489..46bef6a26a8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -668,9 +668,12 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, int ttl; #if IS_ENABLED(CONFIG_INET) + int ret; + rt = ip_route_output_key(dev_net(mirred_dev), fl4); - if (IS_ERR(rt)) - return PTR_ERR(rt); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; #else return -EOPNOTSUPP; #endif @@ -741,8 +744,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct flowi4 fl4 = {}; char *encap_header; int encap_size; - __be32 saddr = 0; - int ttl = 0; + __be32 saddr; + int ttl; int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); From b618ab4561d40664492cf9f9507f19a1c8272970 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Jan 2017 19:19:00 +0100 Subject: [PATCH 252/258] net: stmmac: don't use netdev_[dbg, info, ..] before net_device is registered Don't use netdev_info and friends before the net_device is registered. This avoids ugly messages like "meson8b-dwmac c9410000.ethernet (unnamed net_device) (uninitialized): Enable RX Mitigation via HW Watchdog Timer" Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a276a32d57f2..e3f6389e1b01 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3326,9 +3326,9 @@ int stmmac_dvr_probe(struct device *device, (priv->plat->maxmtu >= ndev->min_mtu)) ndev->max_mtu = priv->plat->maxmtu; else if (priv->plat->maxmtu < ndev->min_mtu) - netdev_warn(priv->dev, - "%s: warning: maxmtu having invalid value (%d)\n", - __func__, priv->plat->maxmtu); + dev_warn(priv->device, + "%s: warning: maxmtu having invalid value (%d)\n", + __func__, priv->plat->maxmtu); if (flow_ctrl) priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ @@ -3340,7 +3340,8 @@ int stmmac_dvr_probe(struct device *device, */ if ((priv->synopsys_id >= DWMAC_CORE_3_50) && (!priv->plat->riwt_off)) { priv->use_riwt = 1; - netdev_info(priv->dev, "Enable RX Mitigation via HW Watchdog Timer\n"); + dev_info(priv->device, + "Enable RX Mitigation via HW Watchdog Timer\n"); } netif_napi_add(ndev, &priv->napi, stmmac_poll, 64); @@ -3366,17 +3367,17 @@ int stmmac_dvr_probe(struct device *device, /* MDIO bus Registration */ ret = stmmac_mdio_register(ndev); if (ret < 0) { - netdev_err(priv->dev, - "%s: MDIO bus (id: %d) registration failed", - __func__, priv->plat->bus_id); + dev_err(priv->device, + "%s: MDIO bus (id: %d) registration failed", + __func__, priv->plat->bus_id); goto error_mdio_register; } } ret = register_netdev(ndev); if (ret) { - netdev_err(priv->dev, "%s: ERROR %i registering the device\n", - __func__, ret); + dev_err(priv->device, "%s: ERROR %i registering the device\n", + __func__, ret); goto error_netdev_register; } From 291c566a28910614ce42d0ffe82196eddd6346f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:37 +0200 Subject: [PATCH 253/258] net/mlx4_core: Fix racy CQ (Completion Queue) free In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index a849da92f857..6b8635378f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; + rcu_read_lock(); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ ++cq->arm_sn; cq->comp(cq); @@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); - + rcu_read_lock(); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - - spin_unlock(&cq_table->lock); + rcu_read_unlock(); if (!cq) { - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, @@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) return err; - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); if (err) goto err_icm; @@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, return 0; err_radix: - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); @@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != priv->eq_table.eq[MLX4_EQ_ASYNC].irq) synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); - if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); From 7c3945bc2073554bb2ecf983e073dee686679c53 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:38 +0200 Subject: [PATCH 254/258] net/mlx4_core: Fix when to save some qp context flags for dynamic VST to VGT transitions Save the qp context flags byte containing the flag disabling vlan stripping in the RESET to INIT qp transition, rather than in the INIT to RTR transition. Per the firmware spec, the flags in this byte are active in the RESET to INIT transition. As a result of saving the flags in the incorrect qp transition, when switching dynamically from VGT to VST and back to VGT, the vlan remained stripped (as is required for VST) and did not return to not-stripped (as is required for VGT). Fixes: f0f829bf42cd ("net/mlx4_core: Add immediate activate for VGT->VST->VGT") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 56185a0b827d..1822382212ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2980,6 +2980,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } + + /* Save param3 for dynamic changes from VST back to VGT */ + qp->param3 = qpc->param3; put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); @@ -3772,7 +3775,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; u8 orig_sched_queue; - __be32 orig_param3 = qpc->param3; u8 orig_vlan_control = qpc->pri_path.vlan_control; u8 orig_fvl_rx = qpc->pri_path.fvl_rx; u8 orig_pri_path_fl = qpc->pri_path.fl; @@ -3814,7 +3816,6 @@ out: */ if (!err) { qp->sched_queue = orig_sched_queue; - qp->param3 = orig_param3; qp->vlan_control = orig_vlan_control; qp->fvl_rx = orig_fvl_rx; qp->pri_path_fl = orig_pri_path_fl; From 9577b174cd0323d287c994ef0891db71666d0765 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:39 +0200 Subject: [PATCH 255/258] net/mlx4_core: Eliminate warning messages for SRQ_LIMIT under SRIOV When running SRIOV, warnings for SRQ LIMIT events flood the Hypervisor's message log when (correct, normally operating) apps use SRQ LIMIT events as a trigger to post WQEs to SRQs. Add more information to the existing debug printout for SRQ_LIMIT, and output the warning messages only for the SRQ CATAS ERROR event. Fixes: acba2420f9d2 ("mlx4_core: Add wrapper functions and comm channel and slave event support to EQs") Fixes: e0debf9cb50d ("mlx4_core: Reduce warning message for SRQ_LIMIT event to debug level") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index cd3638e6fe25..0509996957d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -554,8 +554,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_SRQ_LIMIT: - mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", - __func__); + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", + __func__, be32_to_cpu(eqe->event.srq.srqn), + eq->eqn); case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ @@ -570,15 +571,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq->eqn, eq->cons_index, ret); break; } - mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", - __func__, slave, - be32_to_cpu(eqe->event.srq.srqn), - eqe->type, eqe->subtype); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", + __func__, slave, + be32_to_cpu(eqe->event.srq.srqn), + eqe->type, eqe->subtype); if (!ret && slave != dev->caps.function) { - mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", - __func__, eqe->type, - eqe->subtype, slave); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", + __func__, eqe->type, + eqe->subtype, slave); mlx4_slave_event(dev, slave, eqe); break; } From 0faa9cb5b3836a979864a6357e01d2046884ad52 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 15 Jan 2017 10:14:06 -0500 Subject: [PATCH 256/258] net sched actions: fix refcnt when GETing of action after bind Demonstrating the issue: .. add a drop action $sudo $TC actions add action drop index 10 .. retrieve it $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 29 sec used 29 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... bug 1 above: reference is two. Reference is actually 1 but we forget to subtract 1. ... do a GET again and we see the same issue try a few times and nothing changes ~$ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 31 sec used 31 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... lets try to bind the action to a filter.. $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 ... and now a few GETs: $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 3 bind 1 installed 204 sec used 204 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 4 bind 1 installed 206 sec used 206 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 5 bind 1 installed 235 sec used 235 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 .... as can be observed the reference count keeps going up. After the fix $ sudo $TC actions add action drop index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 4 sec used 4 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 6 sec used 6 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 32 sec used 32 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 33 sec used 33 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Fixes: aecc5cefc389 ("net sched actions: fix GETing actions") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2095c83ce773..e10456ef6f7a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -900,8 +900,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - if (event == RTM_GETACTION) - act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } @@ -914,7 +912,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - tcf_action_destroy(&actions, 0); + if (event != RTM_GETACTION) + tcf_action_destroy(&actions, 0); return ret; } From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Tue, 17 Jan 2017 18:13:51 +0000 Subject: [PATCH 257/258] virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a subtle change in how the virtio_net flags are derived from the SKBs ip_summed field. With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to ip_summed == CHECKSUM_NONE, which should be the same. Further, the virtio spec 1.0 / CS04 explicitly says that VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver. Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb") Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion") Signed-off-by: Rolf Neugebauer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 66204007d7ac..56436472ccc7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; From a249708bc2aa1fe3ddf15dfac22bee519d15996b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 17 Jan 2017 12:23:21 +0100 Subject: [PATCH 258/258] stmmac: add missing of_node_put The function stmmac_dt_phy provides several possibilities for initializing plat->mdio_node, all of which have the effect of increasing the reference count of the assigned value. This field is not updated elsewhere, so the value is live until the end of the lifetime of plat (devm_allocated), just after the end of stmmac_remove_config_dt. Thus, add an of_node_put on plat->mdio_node in stmmac_remove_config_dt. It is possible that the field mdio_node is never initialized, but of_node_put is NULL-safe, so it is also safe to call of_node_put in that case. Signed-off-by: Julia Lawall Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 082cd48db6a7..36942f5a6a53 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -351,6 +351,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev, if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(plat->phy_node); + of_node_put(plat->mdio_node); } #else struct plat_stmmacenet_data *