From 6fce983f9b3ef51d47e647b2cff15049ef803781 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Tue, 13 Dec 2016 11:03:49 +0000
Subject: [PATCH 001/258] ASoC: dwc: Fix PIO mode initialization

We can no longer rely on the return value of
devm_snd_dmaengine_pcm_register(...) to check if the DMA
handle is declared in the DT.

Previously this check activated PIO mode but currently
dma_request_chan returns either a valid channel or -EPROBE_DEFER.

In order to activate PIO mode check instead if the interrupt
line is declared. This reflects better what is documented in
the DT bindings (see Documentation/devicetree/bindings/sound/
designware-i2s.txt).

Also, initialize use_pio variable which was never being set
causing PIO mode to never work.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/dwc/designware_i2s.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c
index 2998954a1c74..bdf8398cbc81 100644
--- a/sound/soc/dwc/designware_i2s.c
+++ b/sound/soc/dwc/designware_i2s.c
@@ -681,22 +681,19 @@ static int dw_i2s_probe(struct platform_device *pdev)
 	}
 
 	if (!pdata) {
-		ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
-		if (ret == -EPROBE_DEFER) {
-			dev_err(&pdev->dev,
-				"failed to register PCM, deferring probe\n");
-			return ret;
-		} else if (ret) {
-			dev_err(&pdev->dev,
-				"Could not register DMA PCM: %d\n"
-				"falling back to PIO mode\n", ret);
+		if (irq >= 0) {
 			ret = dw_pcm_register(pdev);
-			if (ret) {
-				dev_err(&pdev->dev,
-					"Could not register PIO PCM: %d\n",
+			dev->use_pio = true;
+		} else {
+			ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL,
+					0);
+			dev->use_pio = false;
+		}
+
+		if (ret) {
+			dev_err(&pdev->dev, "could not register pcm: %d\n",
 					ret);
-				goto err_clk_disable;
-			}
+			goto err_clk_disable;
 		}
 	}
 

From 0ea617a298dcdc2251b4e10f83ac3f3e627b66e3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 8 Dec 2016 13:05:43 +0000
Subject: [PATCH 002/258] ASoC: rsnd: don't double free kctrl

On an error, snd_ctl_add already free's kctrl, so calling snd_ctl_free_one
to free it again leads to a double free error.  Fix this by removing
the extraneous snd_ctl_free_one call.

Issue found using static analysis with CoverityScan, CID 1372908

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 4bd68de76130..99b5b0835c1e 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1030,10 +1030,8 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod,
 		return -ENOMEM;
 
 	ret = snd_ctl_add(card, kctrl);
-	if (ret < 0) {
-		snd_ctl_free_one(kctrl);
+	if (ret < 0)
 		return ret;
-	}
 
 	cfg->update = update;
 	cfg->card = card;

From c2b36129ce53a22b89dd2b88db33e7ffdefe0f41 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Dec 2016 14:17:47 +0000
Subject: [PATCH 003/258] ASoC: topology: kfree kcontrol->private_value before
 freeing kcontrol

kcontrol->private_value is being kfree'd after kcontrol has been freed
(in previous call to snd_ctl_remove).  Instead, fix this by kfreeing
the private_value before kcontrol.

CoverityScan CID#1388311 "Read from pointer after free"

Fixes: eea3dd4f1247a ("ASoC: topology: Only free TLV for volume mixers of a widget")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-topology.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 65670b2b408c..fbfb1fab88d5 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -514,13 +514,12 @@ static void remove_widget(struct snd_soc_component *comp,
 			    == SND_SOC_TPLG_TYPE_MIXER)
 				kfree(kcontrol->tlv.p);
 
-			snd_ctl_remove(card, kcontrol);
-
 			/* Private value is used as struct soc_mixer_control
 			 * for volume mixers or soc_bytes_ext for bytes
 			 * controls.
 			 */
 			kfree((void *)kcontrol->private_value);
+			snd_ctl_remove(card, kcontrol);
 		}
 		kfree(w->kcontrol_news);
 	}

From 9e4d59ada4d602e78eee9fb5f898ce61fdddb446 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 16 Dec 2016 18:26:54 +0900
Subject: [PATCH 004/258] ASoC: hdmi-codec: use unsigned type to structure
 members with bit-field

This is a fix for Linux 4.10-rc1.

In C language specification, a bit-field is interpreted as a signed or
unsigned integer type consisting of the specified number of bits.

In GCC manual, the range of a signed bit field of N bits is from
-(2^N) / 2 to ((2^N) / 2) - 1
https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Bit-Fields

Therefore, when defined as 1 bit-field with signed type, variables can
represents -1 and 0.

The snd-soc-hdmi-codec module includes a structure which has signed type
members with bit-fields. Codes of this module assign 0 and 1 to the
members. This seems to result in implementation-dependent behaviours.

As of v4.10-rc1 merge window, outside of sound subsystem, this structure
is referred by below GPU modules.
 - tda998x
 - sti-drm
 - mediatek-drm-hdmi
 - msm

As long as I review their codes relevant to the structure, the structure
members are used just for condition statements and printk formats.
My proposal of change is a bit intrusive to the printk formats but this
may be acceptable.

Totally, it's reasonable to use unsigned type for the structure members.
This bug is detected by Sparse, static code analyzer with below warnings.

./include/sound/hdmi-codec.h:39:26: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:40:28: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:41:29: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:42:31: error: dubious one-bit signed bitfield

Fixes: 09184118a8ab ("ASoC: hdmi-codec: Add hdmi-codec for external HDMI-encoders")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
CC: stable@vger.kernel.org
---
 include/sound/hdmi-codec.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h
index 530c57bdefa0..915c4357945c 100644
--- a/include/sound/hdmi-codec.h
+++ b/include/sound/hdmi-codec.h
@@ -36,10 +36,10 @@ struct hdmi_codec_daifmt {
 		HDMI_AC97,
 		HDMI_SPDIF,
 	} fmt;
-	int bit_clk_inv:1;
-	int frame_clk_inv:1;
-	int bit_clk_master:1;
-	int frame_clk_master:1;
+	unsigned int bit_clk_inv:1;
+	unsigned int frame_clk_inv:1;
+	unsigned int bit_clk_master:1;
+	unsigned int frame_clk_master:1;
 };
 
 /*

From 65dadffddbe44a60f8be9e95f264949ba1e547e9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Fri, 16 Dec 2016 14:43:39 -0800
Subject: [PATCH 005/258] Input: joydev - remove unused linux/miscdevice.h
 include

This patch remove the inclusion of linux/miscdevice.h for joydev
since it does not use miscdevice.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joydev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index f3135ae22df4..abd18f31b24f 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -22,7 +22,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/init.h>

From 41c567a5d7d1a986763e58c3394782813c3bcb03 Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Date: Sun, 18 Dec 2016 15:26:12 -0800
Subject: [PATCH 006/258] Input: i8042 - add Pegatron touchpad to noloop table

Avoid AUX loopback in Pegatron C15B touchpad, so input subsystem is able
to recognize a Synaptics touchpad in the AUX port.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=93791
(Touchpad is not detected on DNS 0801480 notebook (PEGATRON C15B))

Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 73a4e68448fc..381d802fe853 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"),
 		},
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "C15B"),
+		},
+	},
 	{ }
 };
 

From 67626c9323027534496d21cf32793121662d03c0 Mon Sep 17 00:00:00 2001
From: Aniroop Mathur <a.mathur@samsung.com>
Date: Sun, 18 Dec 2016 15:27:16 -0800
Subject: [PATCH 007/258] Input: synaptics_i2c - change msleep to usleep_range
 for small msecs

msleep(1~20) may not do what the caller intends, and will often sleep longer.
(~20 ms actual sleep for any value given in the 1~20ms range)
This is not the desired behaviour for many cases like device resume time,
device suspend time, device enable time, retry logic, etc.
Thus, change msleep to usleep_range for precise wakeups.

Signed-off-by: Aniroop Mathur <a.mathur@samsung.com>
Acked-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/synaptics_i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index aa7c5da60800..cb2bf203f4ca 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -29,7 +29,7 @@
  * after soft reset, we should wait for 1 ms
  * before the device becomes operational
  */
-#define SOFT_RESET_DELAY_MS	3
+#define SOFT_RESET_DELAY_US	3000
 /* and after hard reset, we should wait for max 500ms */
 #define HARD_RESET_DELAY_MS	500
 
@@ -311,7 +311,7 @@ static int synaptics_i2c_reset_config(struct i2c_client *client)
 	if (ret) {
 		dev_err(&client->dev, "Unable to reset device\n");
 	} else {
-		msleep(SOFT_RESET_DELAY_MS);
+		usleep_range(SOFT_RESET_DELAY_US, SOFT_RESET_DELAY_US + 100);
 		ret = synaptics_i2c_config(client);
 		if (ret)
 			dev_err(&client->dev, "Unable to config device\n");

From 4a8b3a682be9addff7dbd16371fa8c34103b5c31 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 16 Dec 2016 10:55:49 -0600
Subject: [PATCH 008/258] ASoC: Intel: bytcr_rt5640: fallback mechanism if MCLK
 is not enabled

Commit df1a2776a795 ("ASoC: Intel: bytcr_rt5640: add MCLK support")
was merged but the corresponding clock framework patches have not,
after being bumped from audio to clock to x86 domains. The missing
clock-related patches result in a regression starting with 4.9 with
the audio card not being created.

Rather than reverting this commit and all following updates already
queued up for 4.10, handle run-time dependency on MCLK and fall back
to the previous bit-clock mode. This provides the same functionality
as in 4.8 for Baytrail devices. On Baytrail-CR most devices remain
silent with this fallback but additional patches are needed anyway.
As suggested by Mark Brown, the fallback is only allowed with -ENOENT,
all other run-time errors, including -EPROBE_DEFER, will stop the probe
with no sound card registered.

This patch should be applied to -stable as well as ASoC 4.10 fixes

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 507a86a5eafe..e33e4777a65c 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -825,10 +825,20 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
 	if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && (is_valleyview())) {
 		priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
 		if (IS_ERR(priv->mclk)) {
+			ret_val = PTR_ERR(priv->mclk);
+
 			dev_err(&pdev->dev,
-				"Failed to get MCLK from pmc_plt_clk_3: %ld\n",
-				PTR_ERR(priv->mclk));
-			return PTR_ERR(priv->mclk);
+				"Failed to get MCLK from pmc_plt_clk_3: %d\n",
+				ret_val);
+
+			/*
+			 * Fall back to bit clock usage for -ENOENT (clock not
+			 * available likely due to missing dependencies), bail
+			 * for all other errors, including -EPROBE_DEFER
+			 */
+			if (ret_val != -ENOENT)
+				return ret_val;
+			byt_rt5640_quirk &= ~BYT_RT5640_MCLK_EN;
 		}
 	}
 

From 73ba39ab9307340dc98ec3622891314bbc09cc2e Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 4 Dec 2016 12:51:53 +0800
Subject: [PATCH 009/258] btrfs: return the actual error value from  from
 btrfs_uuid_tree_iterate

In function btrfs_uuid_tree_iterate(), errno is assigned to variable ret
on errors. However, it directly returns 0. It may be better to return
ret. This patch also removes the warning, because the caller already
prints a warning.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188731
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
[ edited subject ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/uuid-tree.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 161342b73ce5..726f928238d0 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -352,7 +352,5 @@ skip:
 
 out:
 	btrfs_free_path(path);
-	if (ret)
-		btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
-	return 0;
+	return ret;
 }

From 1cab2a84f470e15ecc8e5143bfe9398c6e888032 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Tue, 20 Dec 2016 10:29:12 +0000
Subject: [PATCH 010/258] ASoC: wm_adsp: Don't overrun firmware file buffer
 when reading region data

Protect against corrupt firmware files by ensuring that the length we
get for the data in a region actually lies within the available firmware
file data buffer.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 593b7d1aed46..d72ccef9e238 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1551,7 +1551,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 	const struct wmfw_region *region;
 	const struct wm_adsp_region *mem;
 	const char *region_name;
-	char *file, *text;
+	char *file, *text = NULL;
 	struct wm_adsp_buf *buf;
 	unsigned int reg;
 	int regions = 0;
@@ -1700,10 +1700,21 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 			 regions, le32_to_cpu(region->len), offset,
 			 region_name);
 
+		if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
+		    firmware->size) {
+			adsp_err(dsp,
+				 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+				 file, regions, region_name,
+				 le32_to_cpu(region->len), firmware->size);
+			ret = -EINVAL;
+			goto out_fw;
+		}
+
 		if (text) {
 			memcpy(text, region->data, le32_to_cpu(region->len));
 			adsp_info(dsp, "%s: %s\n", file, text);
 			kfree(text);
+			text = NULL;
 		}
 
 		if (reg) {
@@ -1748,6 +1759,7 @@ out_fw:
 	regmap_async_complete(regmap);
 	wm_adsp_buf_free(&buf_list);
 	release_firmware(firmware);
+	kfree(text);
 out:
 	kfree(file);
 
@@ -2233,6 +2245,17 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 		}
 
 		if (reg) {
+			if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
+			    firmware->size) {
+				adsp_err(dsp,
+					 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+					 file, blocks, region_name,
+					 le32_to_cpu(blk->len),
+					 firmware->size);
+				ret = -EINVAL;
+				goto out_fw;
+			}
+
 			buf = wm_adsp_buf_alloc(blk->data,
 						le32_to_cpu(blk->len),
 						&buf_list);

From 7dbbf0fa1bf14c17900bb8057986b06db3822239 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 22 Nov 2016 16:17:50 -0800
Subject: [PATCH 011/258] scsi: scsi-mq: Wait for .queue_rq() if necessary

Ensure that if scsi-mq is enabled that scsi_internal_device_block()
waits until ongoing shost->hostt->queuecommand() calls have finished.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: James Bottomley <jejb@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Doug Ledford <dledford@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c35b6de4ca64..9fd9a977c695 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2893,7 +2893,7 @@ scsi_internal_device_block(struct scsi_device *sdev)
 	 * request queue. 
 	 */
 	if (q->mq_ops) {
-		blk_mq_stop_hw_queues(q);
+		blk_mq_quiesce_queue(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);

From 7961d53d22375bb9e8ae8063533b9059102ed39d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 20 Dec 2016 08:43:30 -0800
Subject: [PATCH 012/258] scsi: qedi: fix build, depends on UIO

Fix build of SCSI qedi driver. It uses uio interfaces so it should
depend on UIO.

ERROR: "uio_unregister_device" [drivers/scsi/qedi/qedi.ko] undefined!
ERROR: "uio_event_notify" [drivers/scsi/qedi/qedi.ko] undefined!
ERROR: "__uio_register_device" [drivers/scsi/qedi/qedi.ko] undefined!

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: QLogic-Storage-Upstream@cavium.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedi/Kconfig b/drivers/scsi/qedi/Kconfig
index 23ca8a274586..21331453db7b 100644
--- a/drivers/scsi/qedi/Kconfig
+++ b/drivers/scsi/qedi/Kconfig
@@ -1,6 +1,6 @@
 config QEDI
 	tristate "QLogic QEDI 25/40/100Gb iSCSI Initiator Driver Support"
-	depends on PCI && SCSI
+	depends on PCI && SCSI && UIO
 	depends on QED
 	select SCSI_ISCSI_ATTRS
 	select QED_LL2

From b6fc513da50c5dbc457a8ad6b58b046a6a68fd9d Mon Sep 17 00:00:00 2001
From: Pavel Rojtberg <rojtberg@gmail.com>
Date: Tue, 27 Dec 2016 11:44:51 -0800
Subject: [PATCH 013/258] Input: xpad - use correct product id for x360w
 controllers

currently the controllers get the same product id as the wireless
receiver. However the controllers actually have their own product id.

The patch makes the driver expose the same product id as the windows
driver.

This improves compatibility when running applications with WINE.

see https://github.com/paroj/xpad/issues/54

Signed-off-by: Pavel Rojtberg <rojtberg@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 6d9499658671..c7d5b2b643d1 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1377,6 +1377,12 @@ static int xpad_init_input(struct usb_xpad *xpad)
 	input_dev->name = xpad->name;
 	input_dev->phys = xpad->phys;
 	usb_to_input_id(xpad->udev, &input_dev->id);
+
+	if (xpad->xtype == XTYPE_XBOX360W) {
+		/* x360w controllers and the receiver have different ids */
+		input_dev->id.product = 0x02a1;
+	}
+
 	input_dev->dev.parent = &xpad->intf->dev;
 
 	input_set_drvdata(input_dev, xpad);

From d7ddad0acc4add42567f7879b116a0b9eea31860 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 27 Dec 2016 11:32:55 -0800
Subject: [PATCH 014/258] Input: synaptics-rmi4 - fix F03 build error when
 serio is module

Since F03 is a boolean, "depends" on symbols that can be modules
do not work quite right. We can enable F03 if SERIO is built-in or
if both RMI core and SERIO core are modules. If SERIO core is module,
but RMI is built-in, we'll get:

drivers/built-in.o: In function `rmi_f03_attention':
rmi_f03.c:(.text+0xf8ef8): undefined reference to `serio_interrupt'
rmi_f03.c:(.text+0xf8fbd): undefined reference to `serio_interrupt'
drivers/built-in.o: In function `rmi_f03_remove':
rmi_f03.c:(.text+0xf9082): undefined reference to `serio_unregister_port'
drivers/built-in.o: In function `rmi_f03_probe':
rmi_f03.c:(.text+0xf9260): undefined reference to `__serio_register_port'

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index 30cc627a4f45..8993983e3fe4 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -41,7 +41,8 @@ config RMI4_SMB
 
 config RMI4_F03
         bool "RMI4 Function 03 (PS2 Guest)"
-        depends on RMI4_CORE && SERIO
+	depends on RMI4_CORE
+	depends on SERIO=y || RMI4_CORE=SERIO
         help
           Say Y here if you want to add support for RMI4 function 03.
 

From 9396c9cb0d9534ca67bda8a34b2413a2ca1c67e9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 22 Dec 2016 15:03:50 +0900
Subject: [PATCH 015/258] perf sched timehist: Show total scheduling time

Show length of analyzed sample time and rate of idle task running.
This also takes care of time range given by --time option.

  $ perf sched timehist -sI | tail
  Samples do not have callchains.
  Idle stats:
      CPU  0 idle for    930.316  msec  ( 92.93%)
      CPU  1 idle for    963.614  msec  ( 96.25%)
      CPU  2 idle for    885.482  msec  ( 88.45%)
      CPU  3 idle for    938.635  msec  ( 93.76%)

      Total number of unique tasks: 118
  Total number of context switches: 2337
             Total run time (msec): 3718.048
      Total scheduling time (msec): 1001.131  (x 4)

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20161222060350.17655-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d53e706a6f17..5b134b0d1ff3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -209,6 +209,7 @@ struct perf_sched {
 	u64		skipped_samples;
 	const char	*time_str;
 	struct perf_time_interval ptime;
+	struct perf_time_interval hist_time;
 };
 
 /* per thread run time data */
@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 		timehist_print_sample(sched, sample, &al, thread, t);
 
 out:
+	if (sched->hist_time.start == 0 && t >= ptime->start)
+		sched->hist_time.start = t;
+	if (ptime->end == 0 || t <= ptime->end)
+		sched->hist_time.end = t;
+
 	if (tr) {
 		/* time of this sched_switch event becomes last time task seen */
 		tr->last_time = sample->time;
@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 	struct thread *t;
 	struct thread_runtime *r;
 	int i;
+	u64 hist_time = sched->hist_time.end - sched->hist_time.start;
 
 	memset(&totals, 0, sizeof(totals));
 
@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 			totals.sched_count += r->run_stats.n;
 			printf("    CPU %2d idle for ", i);
 			print_sched_time(r->total_run_time, 6);
-			printf(" msec\n");
+			printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
 		} else
 			printf("    CPU %2d idle entire time window\n", i);
 	}
@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched,
 
 	printf("\n"
 	       "    Total number of unique tasks: %" PRIu64 "\n"
-	       "Total number of context switches: %" PRIu64 "\n"
-	       "           Total run time (msec): ",
+	       "Total number of context switches: %" PRIu64 "\n",
 	       totals.task_count, totals.sched_count);
 
+	printf("           Total run time (msec): ");
 	print_sched_time(totals.total_run_time, 2);
 	printf("\n");
+
+	printf("    Total scheduling time (msec): ");
+	print_sched_time(hist_time, 2);
+	printf(" (x %d)\n", sched->max_cpu);
 }
 
 typedef int (*sched_handler)(struct perf_tool *tool,

From ee12996c9d392ec61241ab6c74d257bc2122e0bc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Dec 2016 21:49:17 -0300
Subject: [PATCH 016/258] samples/bpf sock_example: Avoid getting ethhdr from
 two includes

To avoid the following build failure on Alpine Linux 3.4, that has
clang-3.8 with the bpf target:

    HOSTCC  samples/bpf/sock_example.o
  In file included from /usr/include/net/ethernet.h:10:0,
                   from /git/linux/samples/bpf/sock_example.h:7,
                   from /git/linux/samples/bpf/sock_example.c:30:
  /usr/include/netinet/if_ether.h:96:8: error: redefinition of 'struct
  ethhdr'
   struct ethhdr {
          ^
  In file included from /git/linux/samples/bpf/sock_example.c:26:0:
  ./usr/include/linux/if_ether.h:144:8: note: originally defined here
   struct ethhdr {
          ^
  scripts/Makefile.host:124: recipe for target
  'samples/bpf/sock_example.o' failed
  make[2]: *** [samples/bpf/sock_example.o] Error 1
  /git/linux/Makefile:1658: recipe for target 'samples/bpf/' failed

So include net/if_ether.h for the needs of sock_example.h, using the
same include that sock_example.c uses.

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Stringer <joe@ovn.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-m9avekl1b651qe1r1zd5tzz9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 samples/bpf/sock_example.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h
index 09f7fe7e5fd7..d8014065d479 100644
--- a/samples/bpf/sock_example.h
+++ b/samples/bpf/sock_example.h
@@ -4,7 +4,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
-#include <net/ethernet.h>
+#include <linux/if_ether.h>
 #include <net/if.h>
 #include <linux/if_packet.h>
 #include <arpa/inet.h>

From abfb7b686a3e5be27bf81db62f9c5c895b76f5d1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 24 Dec 2016 13:59:23 +0000
Subject: [PATCH 017/258] efi/libstub/arm*: Pass latest memory map to the
 kernel

As reported by James Morse, the current libstub code involving the
annotated memory map only works somewhat correctly by accident, due
to the fact that a pool allocation happens to be reused immediately,
retaining its former contents on most implementations of the
UEFI boot services.

Instead of juggling memory maps, which makes the code more complex than
it needs to be, simply put placeholder values into the FDT for the memory
map parameters, and only write the actual values after ExitBootServices()
has been called.

Reported-by: James Morse <james.morse@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org>
Cc: Jeffrey Hugo <jhugo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org
Fixes: ed9cc156c42f ("efi/libstub: Use efi_exit_boot_services() in FDT")
Link: http://lkml.kernel.org/r/1482587963-20183-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/efistub.h |  8 ---
 drivers/firmware/efi/libstub/fdt.c     | 87 +++++++++++++++++---------
 2 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index b98824e3800a..0e2a96b12cb3 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -39,14 +39,6 @@ efi_status_t efi_file_close(void *handle);
 
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
 
-efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			unsigned long orig_fdt_size,
-			void *fdt, int new_fdt_size, char *cmdline_ptr,
-			u64 initrd_addr, u64 initrd_size,
-			efi_memory_desc_t *memory_map,
-			unsigned long map_size, unsigned long desc_size,
-			u32 desc_ver);
-
 efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 					    void *handle,
 					    unsigned long *new_fdt_addr,
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index a6a93116a8f0..921dfa047202 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -16,13 +16,10 @@
 
 #include "efistub.h"
 
-efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			unsigned long orig_fdt_size,
-			void *fdt, int new_fdt_size, char *cmdline_ptr,
-			u64 initrd_addr, u64 initrd_size,
-			efi_memory_desc_t *memory_map,
-			unsigned long map_size, unsigned long desc_size,
-			u32 desc_ver)
+static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			       unsigned long orig_fdt_size,
+			       void *fdt, int new_fdt_size, char *cmdline_ptr,
+			       u64 initrd_addr, u64 initrd_size)
 {
 	int node, num_rsv;
 	int status;
@@ -101,25 +98,23 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
+	fdt_val64 = U64_MAX; /* placeholder */
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
 			     &fdt_val64,  sizeof(fdt_val64));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(map_size);
+	fdt_val32 = U32_MAX; /* placeholder */
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-size",
 			     &fdt_val32,  sizeof(fdt_val32));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(desc_size);
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size",
 			     &fdt_val32, sizeof(fdt_val32));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(desc_ver);
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver",
 			     &fdt_val32, sizeof(fdt_val32));
 	if (status)
@@ -148,6 +143,43 @@ fdt_set_fail:
 	return EFI_LOAD_ERROR;
 }
 
+static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map)
+{
+	int node = fdt_path_offset(fdt, "/chosen");
+	u64 fdt_val64;
+	u32 fdt_val32;
+	int err;
+
+	if (node < 0)
+		return EFI_LOAD_ERROR;
+
+	fdt_val64 = cpu_to_fdt64((unsigned long)*map->map);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start",
+				  &fdt_val64, sizeof(fdt_val64));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->map_size);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-size",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->desc_size);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-size",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->desc_ver);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-ver",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	return EFI_SUCCESS;
+}
+
 #ifndef EFI_FDT_ALIGN
 #define EFI_FDT_ALIGN EFI_PAGE_SIZE
 #endif
@@ -243,20 +275,10 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 			goto fail;
 		}
 
-		/*
-		 * Now that we have done our final memory allocation (and free)
-		 * we can get the memory map key  needed for
-		 * exit_boot_services().
-		 */
-		status = efi_get_memory_map(sys_table, &map);
-		if (status != EFI_SUCCESS)
-			goto fail_free_new_fdt;
-
 		status = update_fdt(sys_table,
 				    (void *)fdt_addr, fdt_size,
 				    (void *)*new_fdt_addr, new_fdt_size,
-				    cmdline_ptr, initrd_addr, initrd_size,
-				    memory_map, map_size, desc_size, desc_ver);
+				    cmdline_ptr, initrd_addr, initrd_size);
 
 		/* Succeeding the first time is the expected case. */
 		if (status == EFI_SUCCESS)
@@ -266,20 +288,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 			/*
 			 * We need to allocate more space for the new
 			 * device tree, so free existing buffer that is
-			 * too small.  Also free memory map, as we will need
-			 * to get new one that reflects the free/alloc we do
-			 * on the device tree buffer.
+			 * too small.
 			 */
 			efi_free(sys_table, new_fdt_size, *new_fdt_addr);
-			sys_table->boottime->free_pool(memory_map);
 			new_fdt_size += EFI_PAGE_SIZE;
 		} else {
 			pr_efi_err(sys_table, "Unable to construct new device tree.\n");
-			goto fail_free_mmap;
+			goto fail_free_new_fdt;
 		}
 	}
 
-	sys_table->boottime->free_pool(memory_map);
 	priv.runtime_map = runtime_map;
 	priv.runtime_entry_count = &runtime_entry_count;
 	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -288,6 +306,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
 
+		status = update_fdt_memmap((void *)*new_fdt_addr, &map);
+		if (status != EFI_SUCCESS) {
+			/*
+			 * The kernel won't get far without the memory map, but
+			 * may still be able to print something meaningful so
+			 * return success here.
+			 */
+			return EFI_SUCCESS;
+		}
+
 		/* Install the new virtual address map */
 		svam = sys_table->runtime->set_virtual_address_map;
 		status = svam(runtime_entry_count * desc_size, desc_size,
@@ -319,9 +347,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 
 	pr_efi_err(sys_table, "Exit boot services failed.\n");
 
-fail_free_mmap:
-	sys_table->boottime->free_pool(memory_map);
-
 fail_free_new_fdt:
 	efi_free(sys_table, new_fdt_size, *new_fdt_addr);
 

From b6f4c66704b875aba9b8c912532323e3cc89824c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 28 Dec 2016 10:47:13 -0300
Subject: [PATCH 018/258] samples/bpf trace_output_user: Remove duplicate
 sys/ioctl.h include

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Stringer <joe@ovn.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-3awp0nv8tpnblatojmwjww7z@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 samples/bpf/trace_output_user.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index f4fa6af22def..ccca1e348017 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <fcntl.h>
 #include <poll.h>
-#include <sys/ioctl.h>
 #include <linux/perf_event.h>
 #include <linux/bpf.h>
 #include <errno.h>

From 63447646ac657fde00bb658ce21a3431940ae0ad Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Thu, 15 Dec 2016 15:49:56 +0100
Subject: [PATCH 019/258] rpmsg: virtio_rpmsg_bus: fix channel creation

Since commit 4dffed5b3ac796b ("rpmsg: Name rpmsg devices based on
channel id"), it is no more possible for a firmware to register twice
a service (on different endpoints). rpmsg_register_device function
is failing when calling device_add for the second time as second
device has the same name as first one already register.
It is because name is based only on service name and so is not more
unique. Previously name was unique thanks to the use of rpmsg_dev_index.

This patch adds destination and source endpoint numbers device name to
create an unique identifier.

Fixes: 4dffed5b3ac7 ("rpmsg: Name rpmsg devices based on channel id")
Acked-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
[bjorn: flipped name and address in device name]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/rpmsg/rpmsg_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index a79cb5a9e5f2..1cfb775e8e82 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -453,8 +453,8 @@ int rpmsg_register_device(struct rpmsg_device *rpdev)
 	struct device *dev = &rpdev->dev;
 	int ret;
 
-	dev_set_name(&rpdev->dev, "%s:%s",
-		     dev_name(dev->parent), rpdev->id.name);
+	dev_set_name(&rpdev->dev, "%s.%s.%d.%d", dev_name(dev->parent),
+		     rpdev->id.name, rpdev->src, rpdev->dst);
 
 	rpdev->dev.bus = &rpmsg_bus;
 	rpdev->dev.release = rpmsg_release_device;

From c81c0e0710f031cb09eb7cbf0e75e6754d1d8346 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Wed, 14 Dec 2016 16:11:00 +0100
Subject: [PATCH 020/258] remoteproc: fix vdev reference management

Commit 2b45cef5868a ("remoteproc: Further extend the vdev life cycle")
extends kref support for vdev management.
It introduces a regression when following sequence is executed:
rproc_boot --> rproc_shutdown --> rproc_boot
Second rproc_boot call crashes on register_virtio_device as device
is already existing.
Issue is previous vdev is never released when rproc is stop because
associated refcount is too high.

kref_get introduces is not needed as kref_init already initializes
krefcount to 1 because it considers associated variable as used.
This introduces a misalignment between kref_get and kref_put calls.

Fixes: 2b45cef5868a ("remoteproc: Further extend the vdev life cycle")
Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9a507e77eced..feb24c43d4c7 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -396,9 +396,6 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 			goto unwind_vring_allocations;
 	}
 
-	/* track the rvdevs list reference */
-	kref_get(&rvdev->refcount);
-
 	list_add_tail(&rvdev->node, &rproc->rvdevs);
 
 	rproc_add_subdev(rproc, &rvdev->subdev,

From a0c10687ec9506b5e14fe3dd47832a77f2f2500c Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 30 Dec 2016 03:21:38 -0800
Subject: [PATCH 021/258] Revert "remoteproc: Merge table_ptr and cached_table
 pointers"

Following any fw_rsc_vdev entries in the resource table are two variable
length arrays, the first one reference vring resources and the second
one is the virtio config space.  The virtio config space is used by
virtio to communicate status and configuration changes and must as such
be shared with the remote.

The reverted commit incorrectly made any changes to the virtio config
space only affect the local copy, in an attempt to allowing memory
protection of the shared resource table.

This reverts commit cda8529346935fc86f476999ac4fbfe4e17abf11.
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 26 ++++++++++++++++----------
 include/linux/remoteproc.h           |  4 +++-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index feb24c43d4c7..90b05c72186c 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -886,13 +886,15 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	/*
 	 * Create a copy of the resource table. When a virtio device starts
 	 * and calls vring_new_virtqueue() the address of the allocated vring
-	 * will be stored in the table_ptr. Before the device is started,
-	 * table_ptr will be copied into device memory.
+	 * will be stored in the cached_table. Before the device is started,
+	 * cached_table will be copied into device memory.
 	 */
-	rproc->table_ptr = kmemdup(table, tablesz, GFP_KERNEL);
-	if (!rproc->table_ptr)
+	rproc->cached_table = kmemdup(table, tablesz, GFP_KERNEL);
+	if (!rproc->cached_table)
 		goto clean_up;
 
+	rproc->table_ptr = rproc->cached_table;
+
 	/* reset max_notifyid */
 	rproc->max_notifyid = -1;
 
@@ -911,16 +913,18 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	}
 
 	/*
-	 * The starting device has been given the rproc->table_ptr as the
+	 * The starting device has been given the rproc->cached_table as the
 	 * resource table. The address of the vring along with the other
-	 * allocated resources (carveouts etc) is stored in table_ptr.
+	 * allocated resources (carveouts etc) is stored in cached_table.
 	 * In order to pass this information to the remote device we must copy
 	 * this information to device memory. We also update the table_ptr so
 	 * that any subsequent changes will be applied to the loaded version.
 	 */
 	loaded_table = rproc_find_loaded_rsc_table(rproc, fw);
-	if (loaded_table)
-		memcpy(loaded_table, rproc->table_ptr, tablesz);
+	if (loaded_table) {
+		memcpy(loaded_table, rproc->cached_table, tablesz);
+		rproc->table_ptr = loaded_table;
+	}
 
 	/* power up the remote processor */
 	ret = rproc->ops->start(rproc);
@@ -948,7 +952,8 @@ stop_rproc:
 clean_up_resources:
 	rproc_resource_cleanup(rproc);
 clean_up:
-	kfree(rproc->table_ptr);
+	kfree(rproc->cached_table);
+	rproc->cached_table = NULL;
 	rproc->table_ptr = NULL;
 
 	rproc_disable_iommu(rproc);
@@ -1182,7 +1187,8 @@ void rproc_shutdown(struct rproc *rproc)
 	rproc_disable_iommu(rproc);
 
 	/* Free the copy of the resource table */
-	kfree(rproc->table_ptr);
+	kfree(rproc->cached_table);
+	rproc->cached_table = NULL;
 	rproc->table_ptr = NULL;
 
 	/* if in crash state, unlock crash handler */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e2f3a3281d8f..8265d351c9f0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -408,7 +408,8 @@ enum rproc_crash_type {
  * @crash_comp: completion used to sync crash handler and the rproc reload
  * @recovery_disabled: flag that state if recovery was disabled
  * @max_notifyid: largest allocated notify id.
- * @table_ptr: our copy of the resource table
+ * @table_ptr: pointer to the resource table in effect
+ * @cached_table: copy of the resource table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  */
 struct rproc {
@@ -440,6 +441,7 @@ struct rproc {
 	bool recovery_disabled;
 	int max_notifyid;
 	struct resource_table *table_ptr;
+	struct resource_table *cached_table;
 	bool has_iommu;
 	bool auto_boot;
 };

From d2e3a1358c37cd82eef92b5e908b4f0472194481 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 29 Dec 2016 14:11:21 +0100
Subject: [PATCH 022/258] ASoC: Fix binding and probing of auxiliary components

Currently binding of auxiliary devices doesn't work as in
soc_bind_aux_dev() function a bound component is not being added
to any list and in soc_probe_aux_devices() we are trying to walk
the component_dev_list list to probe auxiliary components but
at that time this list doesn't contain any auxiliary components
since they are being added to the card only in soc_probe_component().

This patch adds a list to the card where are stored bound but not
probed auxiliary devices, so that all aux devices can be probed.

Fixes: 1a653aa44725 "ASoC: core: replace aux_comp_list to component_dev_list"
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  3 +++
 sound/soc/soc-core.c | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2b502f6cc6d0..b86168a21d56 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -813,6 +813,7 @@ struct snd_soc_component {
 	unsigned int suspended:1; /* is in suspend PM state */
 
 	struct list_head list;
+	struct list_head card_aux_list; /* for auxiliary bound components */
 	struct list_head card_list;
 
 	struct snd_soc_dai_driver *dai_drv;
@@ -1152,6 +1153,7 @@ struct snd_soc_card {
 	 */
 	struct snd_soc_aux_dev *aux_dev;
 	int num_aux_devs;
+	struct list_head aux_comp_list;
 
 	const struct snd_kcontrol_new *controls;
 	int num_controls;
@@ -1547,6 +1549,7 @@ static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
 	INIT_LIST_HEAD(&card->widgets);
 	INIT_LIST_HEAD(&card->paths);
 	INIT_LIST_HEAD(&card->dapm_list);
+	INIT_LIST_HEAD(&card->aux_comp_list);
 	INIT_LIST_HEAD(&card->component_dev_list);
 }
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f1901bb1466e..baa1afa41e3d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1748,6 +1748,7 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
 
 	component->init = aux_dev->init;
 	component->auxiliary = 1;
+	list_add(&component->card_aux_list, &card->aux_comp_list);
 
 	return 0;
 
@@ -1758,16 +1759,14 @@ err_defer:
 
 static int soc_probe_aux_devices(struct snd_soc_card *card)
 {
-	struct snd_soc_component *comp;
+	struct snd_soc_component *comp, *tmp;
 	int order;
 	int ret;
 
 	for (order = SND_SOC_COMP_ORDER_FIRST; order <= SND_SOC_COMP_ORDER_LAST;
 		order++) {
-		list_for_each_entry(comp, &card->component_dev_list, card_list) {
-			if (!comp->auxiliary)
-				continue;
-
+		list_for_each_entry_safe(comp, tmp, &card->aux_comp_list,
+					 card_aux_list) {
 			if (comp->driver->probe_order == order) {
 				ret = soc_probe_component(card,	comp);
 				if (ret < 0) {
@@ -1776,6 +1775,7 @@ static int soc_probe_aux_devices(struct snd_soc_card *card)
 						comp->name, ret);
 					return ret;
 				}
+				list_del(&comp->card_aux_list);
 			}
 		}
 	}

From 63c3194b82530bd71fd49db84eb7ab656b8d404a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 23 Dec 2016 11:21:10 +0200
Subject: [PATCH 023/258] ASoC: tlv320aic3x: Mark the RESET register as
 volatile

The RESET register only have one self clearing bit and it should not be
cached. If it is cached, when we sync the registers back to the chip we
will initiate a software reset as well, which is not desirable.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tlv320aic3x.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 8877b74b0510..bb94d50052d7 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -126,6 +126,16 @@ static const struct reg_default aic3x_reg[] = {
 	{ 108, 0x00 }, { 109, 0x00 },
 };
 
+static bool aic3x_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AIC3X_RESET:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static const struct regmap_config aic3x_regmap = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -133,6 +143,9 @@ static const struct regmap_config aic3x_regmap = {
 	.max_register = DAC_ICC_ADJ,
 	.reg_defaults = aic3x_reg,
 	.num_reg_defaults = ARRAY_SIZE(aic3x_reg),
+
+	.volatile_reg = aic3x_volatile_reg,
+
 	.cache_type = REGCACHE_RBTREE,
 };
 

From 91ce54978ccece323aa6df930249ff84a7d233c7 Mon Sep 17 00:00:00 2001
From: G Kranthi <gudishax.kranthikumar@intel.com>
Date: Tue, 20 Dec 2016 12:46:45 +0530
Subject: [PATCH 024/258] ASoC: Intel: Skylake: Fix to fail safely if module
 not available in path

If a module is not available in a pipeline, fail safely rather than
causing oops.

Signed-off-by: G Kranthi <gudishax.kranthikumar@intel.com>
Signed-off-by: Subhransu S. Prusty <subhransu.s.prusty@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-pcm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 84b5101e6ca6..6c6b63a6b338 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -180,6 +180,9 @@ static int skl_pcm_open(struct snd_pcm_substream *substream,
 	snd_pcm_set_sync(substream);
 
 	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
+	if (!mconfig)
+		return -EINVAL;
+
 	skl_tplg_d0i3_get(skl, mconfig->d0i3_caps);
 
 	return 0;

From a33b56a6a824fa5cd89c74f85cbeb9af1dcef87e Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Tue, 20 Dec 2016 16:47:06 +0800
Subject: [PATCH 025/258] ASoC: nau8825: correct the function name of register

Change to correct name of the register function.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8825.c | 6 +++---
 sound/soc/codecs/nau8825.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index efe3a44658d5..abf77dd422f4 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -561,9 +561,9 @@ static void nau8825_xtalk_prepare(struct nau8825 *nau8825)
 	nau8825_xtalk_backup(nau8825);
 	/* Config IIS as master to output signal by codec */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
-		NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK |
+		NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK |
 		NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_MASTER |
-		(0x2 << NAU8825_I2S_DRV_SFT) | 0x1);
+		(0x2 << NAU8825_I2S_LRC_DIV_SFT) | 0x1);
 	/* Ramp up headphone volume to 0dB to get better performance and
 	 * avoid pop noise in headphone.
 	 */
@@ -657,7 +657,7 @@ static void nau8825_xtalk_clean(struct nau8825 *nau8825)
 		NAU8825_IRQ_RMS_EN, NAU8825_IRQ_RMS_EN);
 	/* Recover default value for IIS */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
-		NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK |
+		NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK |
 		NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_SLAVE);
 	/* Restore value of specific register for cross talk */
 	nau8825_xtalk_restore(nau8825);
diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h
index 5d1704e73241..b6b21b312854 100644
--- a/sound/soc/codecs/nau8825.h
+++ b/sound/soc/codecs/nau8825.h
@@ -247,8 +247,8 @@
 
 /* I2S_PCM_CTRL2 (0x1d) */
 #define NAU8825_I2S_TRISTATE	(1 << 15) /* 0 - normal mode, 1 - Hi-Z output */
-#define NAU8825_I2S_DRV_SFT	12
-#define NAU8825_I2S_DRV_MASK	(0x3 << NAU8825_I2S_DRV_SFT)
+#define NAU8825_I2S_LRC_DIV_SFT	12
+#define NAU8825_I2S_LRC_DIV_MASK	(0x3 << NAU8825_I2S_LRC_DIV_SFT)
 #define NAU8825_I2S_MS_SFT	3
 #define NAU8825_I2S_MS_MASK	(1 << NAU8825_I2S_MS_SFT)
 #define NAU8825_I2S_MS_MASTER	(1 << NAU8825_I2S_MS_SFT)

From a1792cda51300e15b03549cccf0b09f3be82e697 Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Tue, 20 Dec 2016 12:03:09 +0800
Subject: [PATCH 026/258] ASoC: nau8825: fix invalid configuration in
 Pre-Scalar of FLL

The clk_ref_div is not configured in the correct position of the
register. The patch fixes that clk_ref_div, Pre-Scalar, is assigned
the wrong value.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8825.c | 3 ++-
 sound/soc/codecs/nau8825.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index abf77dd422f4..4576f987a4a5 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -2006,7 +2006,8 @@ static void nau8825_fll_apply(struct nau8825 *nau8825,
 			NAU8825_FLL_INTEGER_MASK, fll_param->fll_int);
 	/* FLL pre-scaler */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL4,
-			NAU8825_FLL_REF_DIV_MASK, fll_param->clk_ref_div);
+			NAU8825_FLL_REF_DIV_MASK,
+			fll_param->clk_ref_div << NAU8825_FLL_REF_DIV_SFT);
 	/* select divided VCO input */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL5,
 		NAU8825_FLL_CLK_SW_MASK, NAU8825_FLL_CLK_SW_REF);
diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h
index b6b21b312854..514fd13c2f46 100644
--- a/sound/soc/codecs/nau8825.h
+++ b/sound/soc/codecs/nau8825.h
@@ -137,7 +137,8 @@
 #define NAU8825_FLL_CLK_SRC_FS			(0x3 << NAU8825_FLL_CLK_SRC_SFT)
 
 /* FLL4 (0x07) */
-#define NAU8825_FLL_REF_DIV_MASK		(0x3 << 10)
+#define NAU8825_FLL_REF_DIV_SFT	10
+#define NAU8825_FLL_REF_DIV_MASK	(0x3 << NAU8825_FLL_REF_DIV_SFT)
 
 /* FLL5 (0x08) */
 #define NAU8825_FLL_PDB_DAC_EN		(0x1 << 15)

From 1594c18fd297a8edcc72bc4b161f3f52603ebb92 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 13 Dec 2016 11:15:21 -0700
Subject: [PATCH 027/258] dmaengine: ioatdma: Add Skylake PCI Dev ID

Adding Skylake Xeon PCI device ids for ioatdma and related bits.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/hw.h   | 2 ++
 drivers/dma/ioat/init.c | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 8e67895bcca3..abcc51b343ce 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -64,6 +64,8 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_BDX8	0x6f2e
 #define PCI_DEVICE_ID_INTEL_IOAT_BDX9	0x6f2f
 
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX	0x2021
+
 #define IOAT_VER_1_2            0x12    /* Version 1.2 */
 #define IOAT_VER_2_0            0x20    /* Version 2.0 */
 #define IOAT_VER_3_0            0x30    /* Version 3.0 */
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 90eddd9f07e4..51b2b643ba71 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -106,6 +106,8 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
 
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) },
+
 	/* I/OAT v3.3 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
@@ -243,10 +245,15 @@ static bool is_bdx_ioat(struct pci_dev *pdev)
 	}
 }
 
+static inline bool is_skx_ioat(struct pci_dev *pdev)
+{
+	return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false;
+}
+
 static bool is_xeon_cb32(struct pci_dev *pdev)
 {
 	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
-		is_hsw_ioat(pdev) || is_bdx_ioat(pdev);
+		is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev);
 }
 
 bool is_bwd_ioat(struct pci_dev *pdev)

From 34a31f0af84158955a9747fb5c6712da5bbb5331 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 13 Dec 2016 11:15:27 -0700
Subject: [PATCH 028/258] dmaengine: ioatdma: workaround SKX ioatdma version

The Skylake ioatdma is technically CBDMA 3.2+ and contains the same hardware
bits with some additional 3.3 features, but it's not really 3.3 where the
driver is concerned.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 51b2b643ba71..ace5cb2cb12f 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -1364,6 +1364,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
 	if (device->version >= IOAT_VER_3_0) {
+		if (is_skx_ioat(pdev))
+			device->version = IOAT_VER_3_2;
 		err = ioat3_dma_probe(device, ioat_dca_enabled);
 
 		if (device->version >= IOAT_VER_3_3)

From 1032471b3ec823bce7687034ac5af78a8ac99a9c Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 15 Dec 2016 11:43:30 +0100
Subject: [PATCH 029/258] dmaengine: dw: fix typo in Kconfig

platfroms -> platforms

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: fed42c198b45 ("dma: dw: add PCI part of the driver")
Cc: Viresh Kumar <vireshk@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
index e00c9b022964..5a37b9fcf40d 100644
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -24,5 +24,5 @@ config DW_DMAC_PCI
 	select DW_DMAC_CORE
 	help
 	  Support the Synopsys DesignWare AHB DMA controller on the
-	  platfroms that enumerate it as a PCI device. For example,
+	  platforms that enumerate it as a PCI device. For example,
 	  Intel Medfield has integrated this GPDMA controller.

From 7e96304d99477de1f70db42035071e56439da817 Mon Sep 17 00:00:00 2001
From: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Date: Tue, 13 Dec 2016 14:40:43 +0100
Subject: [PATCH 030/258] dmaengine: stm32-dma: Set correct args number for DMA
 request from DT

This patch sets the right number of arguments to be used for DMA clients
which request channels from DT.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/stm32-dma.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 3688d0873a3e..a884b857cba4 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -972,21 +972,18 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 	struct stm32_dma_chan *chan;
 	struct dma_chan *c;
 
-	if (dma_spec->args_count < 3)
+	if (dma_spec->args_count < 4)
 		return NULL;
 
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
-	cfg.threshold = 0;
+	cfg.threshold = dma_spec->args[3];
 
 	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
 				STM32_DMA_MAX_REQUEST_ID))
 		return NULL;
 
-	if (dma_spec->args_count > 3)
-		cfg.threshold = dma_spec->args[3];
-
 	chan = &dmadev->chan[cfg.channel_id];
 
 	c = dma_get_slave_channel(&chan->vchan.chan);

From 57b5a32135c813f2ab669039fb4ec16b30cb3305 Mon Sep 17 00:00:00 2001
From: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Date: Tue, 13 Dec 2016 14:40:46 +0100
Subject: [PATCH 031/258] dmaengine: stm32-dma: Fix null pointer dereference in
 stm32_dma_tx_status

chan->desc is always set to NULL when a DMA transfer is complete.
As a DMA transfer could be complete during the call of stm32_dma_tx_status,
we need to be sure that chan->desc is not NULL before using this variable
to avoid a null pointer deference issue.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/stm32-dma.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index a884b857cba4..3056ce7f8c69 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -880,7 +880,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 	struct virt_dma_desc *vdesc;
 	enum dma_status status;
 	unsigned long flags;
-	u32 residue;
+	u32 residue = 0;
 
 	status = dma_cookie_status(c, cookie, state);
 	if ((status == DMA_COMPLETE) || (!state))
@@ -888,16 +888,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
 	vdesc = vchan_find_desc(&chan->vchan, cookie);
-	if (cookie == chan->desc->vdesc.tx.cookie) {
+	if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
 		residue = stm32_dma_desc_residue(chan, chan->desc,
 						 chan->next_sg);
-	} else if (vdesc) {
+	else if (vdesc)
 		residue = stm32_dma_desc_residue(chan,
 						 to_stm32_dma_desc(vdesc), 0);
-	} else {
-		residue = 0;
-	}
-
 	dma_set_residue(state, residue);
 
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);

From 75bdc7f31a3a6e9a12e218b31a44a1f54a91554c Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 19 Dec 2016 06:33:51 +0100
Subject: [PATCH 032/258] dmaengine: ti-dma-crossbar: Add some 'of_node_put()'
 in error path.

Add some missing 'of_node_put()' in early exit error path.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ti-dma-crossbar.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index 3f24aeb48c0e..2403475a37cf 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -149,6 +149,7 @@ static int ti_am335x_xbar_probe(struct platform_device *pdev)
 	match = of_match_node(ti_am335x_master_match, dma_node);
 	if (!match) {
 		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
 		return -EINVAL;
 	}
 
@@ -339,6 +340,7 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev)
 	match = of_match_node(ti_dra7_master_match, dma_node);
 	if (!match) {
 		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
 		return -EINVAL;
 	}
 

From 73529c872a189c747bdb528ce9b85b67b0e28dec Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 5 Dec 2016 22:14:36 +0100
Subject: [PATCH 033/258] mtd: nand: xway: disable module support

The xway_nand driver accesses the ltq_ebu_membase symbol which is not
exported. This also should not get exported and we should handle the
EBU interface in a better way later. This quick fix just deactivated
support for building as module.

Fixes: 99f2b107924c ("mtd: lantiq: Add NAND support on Lantiq XWAY SoC.")
Cc: <stable@vger.kernel.org>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 353a9ddf6b97..2f35ab5426f8 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -540,7 +540,7 @@ config MTD_NAND_FSMC
 	  Flexible Static Memory Controller (FSMC)
 
 config MTD_NAND_XWAY
-	tristate "Support for NAND on Lantiq XWAY SoC"
+	bool "Support for NAND on Lantiq XWAY SoC"
 	depends on LANTIQ && SOC_TYPE_XWAY
 	help
 	  Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached

From a2724663494f7313f53da10d8c0a729c5e3c4dea Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 5 Dec 2016 22:14:37 +0100
Subject: [PATCH 034/258] mtd: nand: xway: fix build because of module
 functions

Remove the usage of modules functions to make this driver compile
again. Otherwise an include of linux/modules.h would be needed.

Fixes: 024366750c2e ("mtd: nand: xway: convert to normal platform driver")
Cc: <stable@vger.kernel.org>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/xway_nand.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/xway_nand.c b/drivers/mtd/nand/xway_nand.c
index 1f2948c0c458..895101a5e686 100644
--- a/drivers/mtd/nand/xway_nand.c
+++ b/drivers/mtd/nand/xway_nand.c
@@ -232,7 +232,6 @@ static const struct of_device_id xway_nand_match[] = {
 	{ .compatible = "lantiq,nand-xway" },
 	{},
 };
-MODULE_DEVICE_TABLE(of, xway_nand_match);
 
 static struct platform_driver xway_nand_driver = {
 	.probe	= xway_nand_probe,
@@ -243,6 +242,4 @@ static struct platform_driver xway_nand_driver = {
 	},
 };
 
-module_platform_driver(xway_nand_driver);
-
-MODULE_LICENSE("GPL");
+builtin_platform_driver(xway_nand_driver);

From f0fcdc506b76e924c60fa607bba5872ca4745476 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 1 Jan 2017 18:58:27 -0800
Subject: [PATCH 035/258] mtd: nand: oxnas_nand: fix build errors on arch/um,
 require HAS_IOMEM

Fix build errors on arch/um, which does not support HAS_IOMEM,
while the oxnas_nand.c driver uses interfaces that are
supplied by HAS_IOMEM.

(loadable module build:)
ERROR: "devm_ioremap_resource" [drivers/mtd/nand/oxnas_nand.ko] undefined!
or (built-in build:)
drivers/built-in.o: In function `oxnas_nand_probe':
drivers/mtd/nand/oxnas_nand.c:102: undefined reference to `devm_ioremap_resource'

Fixes: 668592492409 ("mtd: nand: Add OX820 NAND Support")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 2f35ab5426f8..9ce5dcb4abd0 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -426,6 +426,7 @@ config MTD_NAND_ORION
 
 config MTD_NAND_OXNAS
 	tristate "NAND Flash support for Oxford Semiconductor SoC"
+	depends on HAS_IOMEM
 	help
 	  This enables the NAND flash controller on Oxford Semiconductor SoCs.
 

From 1f2ed153b916c95a49a1ca9d7107738664224b7f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 3 Jan 2017 00:20:49 +0900
Subject: [PATCH 036/258] perf probe: Fix to get correct modname from elf
 header

Since 'perf probe' supports cross-arch probes, it is possible to analyze
different arch kernel image which has different bits-per-long.

In that case, it fails to get the module name because it uses the
MOD_NAME_OFFSET macro based on the host machine bits-per-long, instead
of the target arch bits-per-long.

This fixes above issue by changing modname-offset based on the target
archs bit width. This is ok because linux kernel uses LP64 model on
64bit arch.

E.g. without this (on x86_64, and target module is arm32):

  $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup
  p:probe/configfs_lookup :configfs_lookup+0
                          ^-Here is an empty module name.

With this fix, you can see correct module name:

  $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup
  p:probe/configfs_lookup configfs:configfs_lookup+0

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148337043836.6752.383495516397005695.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d281ae2b54e8..8f810961ec78 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -267,21 +267,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
 	return true;
 }
 
-/*
- * NOTE:
- * '.gnu.linkonce.this_module' section of kernel module elf directly
- * maps to 'struct module' from linux/module.h. This section contains
- * actual module name which will be used by kernel after loading it.
- * But, we cannot use 'struct module' here since linux/module.h is not
- * exposed to user-space. Offset of 'name' has remained same from long
- * time, so hardcoding it here.
- */
-#ifdef __LP64__
-#define MOD_NAME_OFFSET 24
-#else
-#define MOD_NAME_OFFSET 12
-#endif
-
 /*
  * @module can be module name of module file path. In case of path,
  * inspect elf and find out what is actual module name.
@@ -296,6 +281,7 @@ static char *find_module_name(const char *module)
 	Elf_Data *data;
 	Elf_Scn *sec;
 	char *mod_name = NULL;
+	int name_offset;
 
 	fd = open(module, O_RDONLY);
 	if (fd < 0)
@@ -317,7 +303,21 @@ static char *find_module_name(const char *module)
 	if (!data || !data->d_buf)
 		goto ret_err;
 
-	mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+	/*
+	 * NOTE:
+	 * '.gnu.linkonce.this_module' section of kernel module elf directly
+	 * maps to 'struct module' from linux/module.h. This section contains
+	 * actual module name which will be used by kernel after loading it.
+	 * But, we cannot use 'struct module' here since linux/module.h is not
+	 * exposed to user-space. Offset of 'name' has remained same from long
+	 * time, so hardcoding it here.
+	 */
+	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		name_offset = 12;
+	else	/* expect ELFCLASS64 by default */
+		name_offset = 24;
+
+	mod_name = strdup((char *)data->d_buf + name_offset);
 
 ret_err:
 	elf_end(elf);

From 836c3ce2566fb8c1754f8d7c9534cad9bc8a6879 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 2 Jan 2017 12:07:37 +0200
Subject: [PATCH 037/258] dmaengine: omap-dma: Fix dynamic lch_map allocation

The original patch did not done what it was supposed to be doing and even
worst it broke legacy boot (OMAP1).

The lch_map size should be the number of available logical channels in sDMA
and the od->dma_requests should store the number of available DMA request
lines usable in sDMA.

In legacy mode we do not have a way to get the DMA request count, in that
case we use OMAP_SDMA_REQUESTS (127), despite the fact that OMAP1510 have
only 31 DMA request line.

Fixes: 2d1a9a946fae ("dmaengine: omap-dma: Dynamically allocate memory for lch_map")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: stable@vger.kernel.org   # v4.9
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/omap-dma.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index ac68666cd3f4..4ad101a47e0a 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -1452,6 +1452,7 @@ static int omap_dma_probe(struct platform_device *pdev)
 	struct omap_dmadev *od;
 	struct resource *res;
 	int rc, i, irq;
+	u32 lch_count;
 
 	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
 	if (!od)
@@ -1494,20 +1495,31 @@ static int omap_dma_probe(struct platform_device *pdev)
 	spin_lock_init(&od->lock);
 	spin_lock_init(&od->irq_lock);
 
-	if (!pdev->dev.of_node) {
-		od->dma_requests = od->plat->dma_attr->lch_count;
-		if (unlikely(!od->dma_requests))
-			od->dma_requests = OMAP_SDMA_REQUESTS;
-	} else if (of_property_read_u32(pdev->dev.of_node, "dma-requests",
-					&od->dma_requests)) {
+	/* Number of DMA requests */
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
 		dev_info(&pdev->dev,
 			 "Missing dma-requests property, using %u.\n",
 			 OMAP_SDMA_REQUESTS);
-		od->dma_requests = OMAP_SDMA_REQUESTS;
 	}
 
-	od->lch_map = devm_kcalloc(&pdev->dev, od->dma_requests,
-				   sizeof(*od->lch_map), GFP_KERNEL);
+	/* Number of available logical channels */
+	if (!pdev->dev.of_node) {
+		lch_count = od->plat->dma_attr->lch_count;
+		if (unlikely(!lch_count))
+			lch_count = OMAP_SDMA_CHANNELS;
+	} else if (of_property_read_u32(pdev->dev.of_node, "dma-channels",
+					&lch_count)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-channels property, using %u.\n",
+			 OMAP_SDMA_CHANNELS);
+		lch_count = OMAP_SDMA_CHANNELS;
+	}
+
+	od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map),
+				   GFP_KERNEL);
 	if (!od->lch_map)
 		return -ENOMEM;
 

From f53243b563e8966fb5a5cd8f27d48b832d3b1c43 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 2 Jan 2017 17:42:08 +0100
Subject: [PATCH 038/258] MAINTAINERS: dmaengine: Update + Hand over the
 at_hdmac driver to Ludovic

Hand over the Microchip / Atmel DMA driver handled by at_hdmac driver
to Ludovic who is responsible for the newer at_xdmac driver as well.
Also update the entry name and position to follow company changes.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 MAINTAINERS | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..c10150853273 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2194,14 +2194,6 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/atmel
 
-ATMEL DMA DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/dma/at_hdmac.c
-F:	drivers/dma/at_hdmac_regs.h
-F:	include/linux/platform_data/dma-atmel.h
-
 ATMEL XDMA DRIVER
 M:	Ludovic Desroches <ludovic.desroches@atmel.com>
 L:	linux-arm-kernel@lists.infradead.org
@@ -8174,6 +8166,15 @@ S:	Maintained
 F:	drivers/tty/serial/atmel_serial.c
 F:	include/linux/atmel_serial.h
 
+MICROCHIP / ATMEL DMA DRIVER
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/at_hdmac.c
+F:	drivers/dma/at_hdmac_regs.h
+F:	include/linux/platform_data/dma-atmel.h
+
 MICROCHIP / ATMEL ISC DRIVER
 M:	Songjun Wu <songjun.wu@microchip.com>
 L:	linux-media@vger.kernel.org

From 5c9e6c2b2ba3ec3a442e2fb5b4286498f8b4dcb7 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 16 Dec 2016 11:39:11 +0100
Subject: [PATCH 039/258] dmaengine: pl330: Fix runtime PM support for
 terminated transfers

PL330 DMA engine driver is leaking a runtime reference after any terminated
DMA transactions. This patch fixes this issue by tracking runtime PM state
of the device and making additional call to pm_runtime_put() in terminate_all
callback if needed.

Fixes: ae43b3289186 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/pl330.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 87fd01539fcb..740bbb942594 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -448,6 +448,9 @@ struct dma_pl330_chan {
 
 	/* for cyclic capability */
 	bool cyclic;
+
+	/* for runtime pm tracking */
+	bool active;
 };
 
 struct pl330_dmac {
@@ -2033,6 +2036,7 @@ static void pl330_tasklet(unsigned long data)
 		_stop(pch->thread);
 		spin_unlock(&pch->thread->dmac->lock);
 		power_down = true;
+		pch->active = false;
 	} else {
 		/* Make sure the PL330 Channel thread is active */
 		spin_lock(&pch->thread->dmac->lock);
@@ -2052,6 +2056,7 @@ static void pl330_tasklet(unsigned long data)
 			desc->status = PREP;
 			list_move_tail(&desc->node, &pch->work_list);
 			if (power_down) {
+				pch->active = true;
 				spin_lock(&pch->thread->dmac->lock);
 				_start(pch->thread);
 				spin_unlock(&pch->thread->dmac->lock);
@@ -2166,6 +2171,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	unsigned long flags;
 	struct pl330_dmac *pl330 = pch->dmac;
 	LIST_HEAD(list);
+	bool power_down = false;
 
 	pm_runtime_get_sync(pl330->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
@@ -2176,6 +2182,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	pch->thread->req[0].desc = NULL;
 	pch->thread->req[1].desc = NULL;
 	pch->thread->req_running = -1;
+	power_down = pch->active;
+	pch->active = false;
 
 	/* Mark all desc done */
 	list_for_each_entry(desc, &pch->submitted_list, node) {
@@ -2193,6 +2201,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
 	spin_unlock_irqrestore(&pch->lock, flags);
 	pm_runtime_mark_last_busy(pl330->ddma.dev);
+	if (power_down)
+		pm_runtime_put_autosuspend(pl330->ddma.dev);
 	pm_runtime_put_autosuspend(pl330->ddma.dev);
 
 	return 0;
@@ -2357,6 +2367,7 @@ static void pl330_issue_pending(struct dma_chan *chan)
 		 * updated on work_list emptiness status.
 		 */
 		WARN_ON(list_empty(&pch->submitted_list));
+		pch->active = true;
 		pm_runtime_get_sync(pch->dmac->ddma.dev);
 	}
 	list_splice_tail_init(&pch->submitted_list, &pch->work_list);

From 7165b8ad36f8bda42a5a8aa059b9a5071acc2210 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Mon, 19 Dec 2016 15:30:12 +0100
Subject: [PATCH 040/258] mtd: nand: tango: Update DT binding description

Visually separate register ranges (address/size pairs) in reg prop.
Change DMA channel name, for consistency with other drivers.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/tango-nand.txt | 6 +++---
 drivers/mtd/nand/tango_nand.c                        | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/tango-nand.txt b/Documentation/devicetree/bindings/mtd/tango-nand.txt
index ad5a02f2ac8c..cd1bf2ac9055 100644
--- a/Documentation/devicetree/bindings/mtd/tango-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/tango-nand.txt
@@ -5,7 +5,7 @@ Required properties:
 - compatible: "sigma,smp8758-nand"
 - reg: address/size of nfc_reg, nfc_mem, and pbus_reg
 - dmas: reference to the DMA channel used by the controller
-- dma-names: "nfc_sbox"
+- dma-names: "rxtx"
 - clocks: reference to the system clock
 - #address-cells: <1>
 - #size-cells: <0>
@@ -17,9 +17,9 @@ Example:
 
 	nandc: nand-controller@2c000 {
 		compatible = "sigma,smp8758-nand";
-		reg = <0x2c000 0x30 0x2d000 0x800 0x20000 0x1000>;
+		reg = <0x2c000 0x30>, <0x2d000 0x800>, <0x20000 0x1000>;
 		dmas = <&dma0 3>;
-		dma-names = "nfc_sbox";
+		dma-names = "rxtx";
 		clocks = <&clkgen SYS_CLK>;
 		#address-cells = <1>;
 		#size-cells = <0>;
diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 28c7f474be77..78a65f672c87 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -636,7 +636,7 @@ static int tango_nand_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	nfc->chan = dma_request_chan(&pdev->dev, "nfc_sbox");
+	nfc->chan = dma_request_chan(&pdev->dev, "rxtx");
 	if (IS_ERR(nfc->chan))
 		return PTR_ERR(nfc->chan);
 

From 8043d25b3c0fa0a8f531333707f682f03b6febdb Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 3 Jan 2017 11:01:14 +0100
Subject: [PATCH 041/258] mtd: nand: tango: Reset pbus to raw mode in probe

Linux should not expect the boot loader to properly configure the
peripheral bus "pad mode", so reset PBUS_PAD_MODE to raw.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/tango_nand.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 78a65f672c87..4a5e948c62df 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -632,6 +632,8 @@ static int tango_nand_probe(struct platform_device *pdev)
 	if (IS_ERR(nfc->pbus_base))
 		return PTR_ERR(nfc->pbus_base);
 
+	writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE);
+
 	clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);

From b66fb1da5a8cac3f5c3cdbe41937c91efc4e76a4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:54 +0100
Subject: [PATCH 042/258] tools lib subcmd: Add OPT_STRING_OPTARG_SET option

To allow string options with a default argument and variable set when
the option is used.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/parse-options.c | 3 +++
 tools/lib/subcmd/parse-options.h | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 3284bb14ae78..8aad81151d50 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p,
 		else
 			err = get_arg(p, opt, flags, (const char **)opt->value);
 
+		if (opt->set)
+			*(bool *)opt->set = true;
+
 		/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
 		if (opt->flags & PARSE_OPT_NOEMPTY) {
 			const char *val = *(const char **)opt->value;
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 8866ac438b34..11c3be3bcce7 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -137,6 +137,11 @@ struct option {
 	{ .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
 	  .value = check_vtype(v, const char **), (a), .help = (h), \
 	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
+#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
+	{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), (a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
+	  .set = check_vtype(os, bool *)}
 #define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }

From efd21307119d5a23ac83ae8fd5a39a5c7aeb8493 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:55 +0100
Subject: [PATCH 043/258] perf record: Make __record_options static

There's no need for this one to be global.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 74d6a035133a..31cf0ce12a65 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1405,7 +1405,7 @@ static bool dry_run;
  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
  * using pipes, etc.
  */
-struct option __record_options[] = {
+static struct option __record_options[] = {
 	OPT_CALLBACK('e', "event", &record.evlist, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events_option),

From 60437ac02f398e0ee0927748d4798dd5534ac90d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:56 +0100
Subject: [PATCH 044/258] perf record: Fix --switch-output documentation and
 comment

There's no --signal-trigger option, also adding the code comment into
record man page.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 4 ++++
 tools/perf/builtin-record.c              | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 27fc3617c6a4..5054d9147f0f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that
 particular perf.data snapshot should be kept or not.
 
 Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+The reason for the latter two is to reduce the data file switching
+overhead. You can still switch them on with:
+
+  --switch-output --no-no-buildid  --no-no-buildid-cache
 
 --dry-run::
 Parse options then exit. --dry-run can be used to detect errors in cmdline
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 31cf0ce12a65..4ec10e9427d9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		 * overhead. Still generate buildid if they are required
 		 * explicitly using
 		 *
-		 *  perf record --signal-trigger --no-no-buildid \
+		 *  perf record --switch-output --no-no-buildid \
 		 *              --no-no-buildid-cache
 		 *
 		 * Following code equals to:

From aa7c8da35d1905d80e840d075f07d26ec90144b5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 20 Dec 2016 13:28:27 -0500
Subject: [PATCH 045/258] btrfs: fix error handling when run_delayed_extent_op
 fails

In __btrfs_run_delayed_refs, the error path when run_delayed_extent_op
fails sets locked_ref->processing = 0 but doesn't re-increment
delayed_refs->num_heads_ready.  As a result, we end up triggering
the WARN_ON in btrfs_select_ref_head.

Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads)
Reported-by: Jon Nelson <jnelson-suse@jamponi.net>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e97302f437a1..5366e50c84c6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2572,7 +2572,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 					 */
 					if (must_insert_reserved)
 						locked_ref->must_insert_reserved = 1;
+					spin_lock(&delayed_refs->lock);
 					locked_ref->processing = 0;
+					delayed_refs->num_heads_ready++;
+					spin_unlock(&delayed_refs->lock);
 					btrfs_debug(fs_info,
 						    "run_delayed_extent_op returned %d",
 						    ret);

From d0280996437081dd12ed1e982ac8aeaa62835ec4 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 20 Dec 2016 13:28:28 -0500
Subject: [PATCH 046/258] btrfs: fix locking when we put back a delayed ref
 that's too new

In __btrfs_run_delayed_refs, when we put back a delayed ref that's too
new, we have already dropped the lock on locked_ref when we set
->processing = 0.

This patch keeps the lock to cover that assignment.

Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads)
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5366e50c84c6..ac7e6713033c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2522,11 +2522,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 		if (ref && ref->seq &&
 		    btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
 			spin_unlock(&locked_ref->lock);
-			btrfs_delayed_ref_unlock(locked_ref);
 			spin_lock(&delayed_refs->lock);
 			locked_ref->processing = 0;
 			delayed_refs->num_heads_ready++;
 			spin_unlock(&delayed_refs->lock);
+			btrfs_delayed_ref_unlock(locked_ref);
 			locked_ref = NULL;
 			cond_resched();
 			count++;

From e321f8a801d7b4c40da8005257b05b9c2b51b072 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:11:04 -0800
Subject: [PATCH 047/258] Btrfs: use down_read_nested to make lockdep silent

If @block_group is not @used_bg, it'll try to get @used_bg's lock without
droping @block_group 's lock and lockdep has throwed a scary deadlock warning
about it.
Fix it by using down_read_nested.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ac7e6713033c..dcd2e798767e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7387,7 +7387,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
 
 		spin_unlock(&cluster->refill_lock);
 
-		down_read(&used_bg->data_rwsem);
+		/* We should only have one-level nested. */
+		down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
 
 		spin_lock(&cluster->refill_lock);
 		if (used_bg == cluster->block_group)

From 781feef7e6befafd4d9787d1f7ada1f9ccd504e4 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:20:25 -0800
Subject: [PATCH 048/258] Btrfs: fix lockdep warning about log_mutex

While checking INODE_REF/INODE_EXTREF for a corner case, we may acquire a
different inode's log_mutex with holding the current inode's log_mutex, and
lockdep has complained this with a possilble deadlock warning.

Fix this by using mutex_lock_nested() when processing the other inode's
log_mutex.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f10bf5213ed8..eeffff84f280 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -37,6 +37,7 @@
  */
 #define LOG_INODE_ALL 0
 #define LOG_INODE_EXISTS 1
+#define LOG_OTHER_INODE 2
 
 /*
  * directory trouble cases
@@ -4641,7 +4642,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	if (S_ISDIR(inode->i_mode) ||
 	    (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 		       &BTRFS_I(inode)->runtime_flags) &&
-	     inode_only == LOG_INODE_EXISTS))
+	     inode_only >= LOG_INODE_EXISTS))
 		max_key.type = BTRFS_XATTR_ITEM_KEY;
 	else
 		max_key.type = (u8)-1;
@@ -4665,7 +4666,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	mutex_lock(&BTRFS_I(inode)->log_mutex);
+	if (inode_only == LOG_OTHER_INODE) {
+		inode_only = LOG_INODE_EXISTS;
+		mutex_lock_nested(&BTRFS_I(inode)->log_mutex,
+				  SINGLE_DEPTH_NESTING);
+	} else {
+		mutex_lock(&BTRFS_I(inode)->log_mutex);
+	}
 
 	/*
 	 * a brute force approach to making sure we get the most uptodate
@@ -4817,7 +4824,7 @@ again:
 				 * unpin it.
 				 */
 				err = btrfs_log_inode(trans, root, other_inode,
-						      LOG_INODE_EXISTS,
+						      LOG_OTHER_INODE,
 						      0, LLONG_MAX, ctx);
 				iput(other_inode);
 				if (err)

From c2931667c83ded6504b3857e99cc45b21fa496fb Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 22 Dec 2016 17:13:54 -0800
Subject: [PATCH 049/258] Btrfs: adjust outstanding_extents counter properly
 when dio write is split

Currently how btrfs dio deals with split dio write is not good
enough if dio write is split into several segments due to the
lack of contiguous space, a large dio write like 'dd bs=1G count=1'
can end up with incorrect outstanding_extents counter and endio
would complain loudly with an assertion.

This fixes the problem by compensating the outstanding_extents
counter in inode if a large dio write gets split.

Reported-by: Anand Jain <anand.jain@oracle.com>
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a713d9d324b0..81b9d9d0450c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7623,11 +7623,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
 	 * within our reservation, otherwise we need to adjust our inode
 	 * counter appropriately.
 	 */
-	if (dio_data->outstanding_extents) {
+	if (dio_data->outstanding_extents >= num_extents) {
 		dio_data->outstanding_extents -= num_extents;
 	} else {
+		/*
+		 * If dio write length has been split due to no large enough
+		 * contiguous space, we need to compensate our inode counter
+		 * appropriately.
+		 */
+		u64 num_needed = num_extents - dio_data->outstanding_extents;
+
 		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->outstanding_extents += num_extents;
+		BTRFS_I(inode)->outstanding_extents += num_needed;
 		spin_unlock(&BTRFS_I(inode)->lock);
 	}
 }

From 3b046a97cbd35a73e1eef968dbfb1a0aac745a77 Mon Sep 17 00:00:00 2001
From: Robert LeBlanc <robert@leblancnet.us>
Date: Mon, 5 Dec 2016 13:02:57 -0700
Subject: [PATCH 050/258] md/raid1: Refactor raid1_make_request

Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c | 267 +++++++++++++++++++++++----------------------
 1 file changed, 139 insertions(+), 128 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a1f3fbed9100..14422407e520 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1066,17 +1066,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+				 struct r1bio *r1_bio)
 {
 	struct r1conf *conf = mddev->private;
 	struct raid1_info *mirror;
-	struct r1bio *r1_bio;
 	struct bio *read_bio;
+	struct bitmap *bitmap = mddev->bitmap;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	int rdisk;
+
+	wait_barrier(conf, bio);
+
+read_again:
+	rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+	if (rdisk < 0) {
+		/* couldn't find anywhere to read from */
+		raid_end_bio_io(r1_bio);
+		return;
+	}
+	mirror = conf->mirrors + rdisk;
+
+	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+	    bitmap) {
+		/*
+		 * Reading from a write-mostly device must take care not to
+		 * over-take any writes that are 'behind'
+		 */
+		raid1_log(mddev, "wait behind writes");
+		wait_event(bitmap->behind_wait,
+			   atomic_read(&bitmap->behind_writes) == 0);
+	}
+	r1_bio->read_disk = rdisk;
+	r1_bio->start_next_window = 0;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r1_bio->bios[rdisk] = read_bio;
+
+	read_bio->bi_iter.bi_sector = r1_bio->sector +
+		mirror->rdev->data_offset;
+	read_bio->bi_bdev = mirror->rdev->bdev;
+	read_bio->bi_end_io = raid1_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	if (test_bit(FailFast, &mirror->rdev->flags) &&
+	    test_bit(R1BIO_FailFast, &r1_bio->state))
+	        read_bio->bi_opf |= MD_FAILFAST;
+	read_bio->bi_private = r1_bio;
+
+	if (mddev->gendisk)
+	        trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+	                              read_bio, disk_devt(mddev->gendisk),
+	                              r1_bio->sector);
+
+	if (max_sectors < r1_bio->sectors) {
+		/*
+		 * could not read all from this device, so we will need another
+		 * r1_bio.
+		 */
+		sectors_handled = (r1_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r1_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+
+		/*
+		 * Cannot call generic_make_request directly as that will be
+		 * queued in __make_request and subsequent mempool_alloc might
+		 * block waiting for it.  So hand bio over to raid1d.
+		 */
+		reschedule_retry(r1_bio);
+
+		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+		r1_bio->master_bio = bio;
+		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r1_bio->state = 0;
+		r1_bio->mddev = mddev;
+		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+				struct r1bio *r1_bio)
+{
+	struct r1conf *conf = mddev->private;
 	int i, disks;
-	struct bitmap *bitmap;
+	struct bitmap *bitmap = mddev->bitmap;
 	unsigned long flags;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_opf &
 						(REQ_PREFLUSH | REQ_FUA));
@@ -1096,15 +1186,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (bio_data_dir(bio) == WRITE &&
-	    ((bio_end_sector(bio) > mddev->suspend_lo &&
+	if ((bio_end_sector(bio) > mddev->suspend_lo &&
 	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
 	    (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
-		/* As the suspend_* range is controlled by
-		 * userspace, we want an interruptible
-		 * wait.
+		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
+
+		/*
+		 * As the suspend_* range is controlled by userspace, we want
+		 * an interruptible wait.
 		 */
 		DEFINE_WAIT(w);
 		for (;;) {
@@ -1115,128 +1205,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
 			    (mddev_is_clustered(mddev) &&
 			     !md_cluster_ops->area_resyncing(mddev, WRITE,
-				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
+				     bio->bi_iter.bi_sector,
+				     bio_end_sector(bio))))
 				break;
 			schedule();
 		}
 		finish_wait(&conf->wait_barrier, &w);
 	}
-
 	start_next_window = wait_barrier(conf, bio);
 
-	bitmap = mddev->bitmap;
-
-	/*
-	 * make_request() can abort the operation when read-ahead is being
-	 * used and no empty request is available.
-	 *
-	 */
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-	r1_bio->master_bio = bio;
-	r1_bio->sectors = bio_sectors(bio);
-	r1_bio->state = 0;
-	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_iter.bi_sector;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r1_bio and no locking
-	 * will be needed when requests complete.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		int rdisk;
-
-read_again:
-		rdisk = read_balance(conf, r1_bio, &max_sectors);
-
-		if (rdisk < 0) {
-			/* couldn't find anywhere to read from */
-			raid_end_bio_io(r1_bio);
-			return;
-		}
-		mirror = conf->mirrors + rdisk;
-
-		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
-		    bitmap) {
-			/* Reading from a write-mostly device must
-			 * take care not to over-take any writes
-			 * that are 'behind'
-			 */
-			raid1_log(mddev, "wait behind writes");
-			wait_event(bitmap->behind_wait,
-				   atomic_read(&bitmap->behind_writes) == 0);
-		}
-		r1_bio->read_disk = rdisk;
-		r1_bio->start_next_window = 0;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r1_bio->bios[rdisk] = read_bio;
-
-		read_bio->bi_iter.bi_sector = r1_bio->sector +
-			mirror->rdev->data_offset;
-		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		if (test_bit(FailFast, &mirror->rdev->flags) &&
-		    test_bit(R1BIO_FailFast, &r1_bio->state))
-			read_bio->bi_opf |= MD_FAILFAST;
-		read_bio->bi_private = r1_bio;
-
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
-					      read_bio, disk_devt(mddev->gendisk),
-					      r1_bio->sector);
-
-		if (max_sectors < r1_bio->sectors) {
-			/* could not read all from this device, so we will
-			 * need another r1_bio.
-			 */
-
-			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __make_request
-			 * and subsequent mempool_alloc might block waiting
-			 * for it.  So hand bio over to raid1d.
-			 */
-			reschedule_retry(r1_bio);
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = bio;
-			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r1_bio->state = 0;
-			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		raid1_log(mddev, "wait queued");
@@ -1280,8 +1257,7 @@ read_again:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, r1_bio->sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* mustn't write here until the bad block is
@@ -1370,7 +1346,8 @@ read_again:
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+			 max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -1464,6 +1441,40 @@ read_again:
 	wake_up(&conf->wait_barrier);
 }
 
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r1conf *conf = mddev->private;
+	struct r1bio *r1_bio;
+
+	/*
+	 * make_request() can abort the operation when read-ahead is being
+	 * used and no empty request is available.
+	 *
+	 */
+	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+	r1_bio->master_bio = bio;
+	r1_bio->sectors = bio_sectors(bio);
+	r1_bio->state = 0;
+	r1_bio->mddev = mddev;
+	r1_bio->sector = bio->bi_iter.bi_sector;
+
+	/*
+	 * We might need to issue multiple reads to different devices if there
+	 * are bad blocks around, so we keep track of the number of reads in
+	 * bio->bi_phys_segments.  If this is 0, there is only one r1_bio and
+	 * no locking will be needed when requests complete.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid1_read_request(mddev, bio, r1_bio);
+	else
+		raid1_write_request(mddev, bio, r1_bio);
+}
+
 static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r1conf *conf = mddev->private;

From bb5f1ed70bc3bbbce510907da3432dab267ff508 Mon Sep 17 00:00:00 2001
From: Robert LeBlanc <robert@leblancnet.us>
Date: Mon, 5 Dec 2016 13:02:58 -0700
Subject: [PATCH 051/258] md/raid10: Refactor raid10_make_request

Refactor raid10_make_request into seperate read and write functions to
clean up the code.

Shaohua: add the recovery check back to read path

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 245 +++++++++++++++++++++++++-------------------
 1 file changed, 140 insertions(+), 105 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ab5e86209322..1920756828df 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+				struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
 	struct bio *read_bio;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	sector_t sectors;
+	struct md_rdev *rdev;
+	int slot;
+
+	/*
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
+	 */
+	wait_barrier(conf);
+
+	sectors = bio_sectors(bio);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
+		 */
+		raid10_log(conf->mddev, "wait reshape");
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+
+read_again:
+	rdev = read_balance(conf, r10_bio, &max_sectors);
+	if (!rdev) {
+		raid_end_bio_io(r10_bio);
+		return;
+	}
+	slot = r10_bio->read_slot;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r10_bio->devs[slot].bio = read_bio;
+	r10_bio->devs[slot].rdev = rdev;
+
+	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+		choose_data_offset(r10_bio, rdev);
+	read_bio->bi_bdev = rdev->bdev;
+	read_bio->bi_end_io = raid10_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	if (test_bit(FailFast, &rdev->flags) &&
+	    test_bit(R10BIO_FailFast, &r10_bio->state))
+	        read_bio->bi_opf |= MD_FAILFAST;
+	read_bio->bi_private = r10_bio;
+
+	if (mddev->gendisk)
+	        trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+	                              read_bio, disk_devt(mddev->gendisk),
+	                              r10_bio->sector);
+	if (max_sectors < r10_bio->sectors) {
+		/*
+		 * Could not read all from this device, so we will need another
+		 * r10_bio.
+		 */
+		sectors_handled = (r10_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/*
+		 * Cannot call generic_make_request directly as that will be
+		 * queued in __generic_make_request and subsequent
+		 * mempool_alloc might block waiting for it.  so hand bio over
+		 * to raid10d.
+		 */
+		reschedule_retry(r10_bio);
+
+		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+		r10_bio->master_bio = bio;
+		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r10_bio->state = 0;
+		r10_bio->mddev = mddev;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+	return;
+}
+
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+				 struct r10bio *r10_bio)
+{
+	struct r10conf *conf = mddev->private;
 	int i;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
 	unsigned long flags;
 	struct md_rdev *blocked_rdev;
 	struct blk_plug_cb *cb;
 	struct raid10_plug_cb *plug = NULL;
+	sector_t sectors;
 	int sectors_handled;
 	int max_sectors;
-	int sectors;
 
 	md_write_start(mddev, bio);
 
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    bio->bi_iter.bi_sector < conf->reshape_progress &&
 	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/* IO spans the reshape position.  Need to wait for
-		 * reshape to pass
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
 		 */
 		raid10_log(conf->mddev, "wait reshape");
 		allow_barrier(conf);
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 			   sectors);
 		wait_barrier(conf);
 	}
+
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio_data_dir(bio) == WRITE &&
 	    (mddev->reshape_backwards
 	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
 		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 		conf->reshape_safe = mddev->reshape_position;
 	}
 
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = sectors;
-
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_iter.bi_sector;
-	r10_bio->state = 0;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		struct md_rdev *rdev;
-		int slot;
-
-read_again:
-		rdev = read_balance(conf, r10_bio, &max_sectors);
-		if (!rdev) {
-			raid_end_bio_io(r10_bio);
-			return;
-		}
-		slot = r10_bio->read_slot;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r10_bio->devs[slot].bio = read_bio;
-		r10_bio->devs[slot].rdev = rdev;
-
-		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
-			choose_data_offset(r10_bio, rdev);
-		read_bio->bi_bdev = rdev->bdev;
-		read_bio->bi_end_io = raid10_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		if (test_bit(FailFast, &rdev->flags) &&
-		    test_bit(R10BIO_FailFast, &r10_bio->state))
-			read_bio->bi_opf |= MD_FAILFAST;
-		read_bio->bi_private = r10_bio;
-
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
-					      read_bio, disk_devt(mddev->gendisk),
-					      r10_bio->sector);
-		if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
-			 */
-			sectors_handled = (r10_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r10_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
-			 */
-			reschedule_retry(r10_bio);
-
-			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-			r10_bio->master_bio = bio;
-			r10_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r10_bio->state = 0;
-			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		raid10_log(mddev, "wait queued");
@@ -1300,8 +1308,7 @@ retry_write:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, dev_sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* Mustn't write here until the bad block
@@ -1405,8 +1412,7 @@ retry_write:
 			r10_bio->devs[i].bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
-					   choose_data_offset(r10_bio,
-							      rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1457,8 +1463,7 @@ retry_write:
 			r10_bio->devs[i].repl_bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
-					   choose_data_offset(
-						   r10_bio, rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1503,6 +1508,36 @@ retry_write:
 	one_write_done(r10_bio);
 }
 
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	struct r10bio *r10_bio;
+
+	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+	r10_bio->master_bio = bio;
+	r10_bio->sectors = bio_sectors(bio);
+
+	r10_bio->mddev = mddev;
+	r10_bio->sector = bio->bi_iter.bi_sector;
+	r10_bio->state = 0;
+
+	/*
+	 * We might need to issue multiple reads to different devices if there
+	 * are bad blocks around, so we keep track of the number of reads in
+	 * bio->bi_phys_segments.  If this is 0, there is only one r10_bio and
+	 * no locking will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid10_read_request(mddev, bio, r10_bio);
+	else
+		raid10_write_request(mddev, bio, r10_bio);
+}
+
 static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;

From 074859184d770824f4437dca716bdeb625ae8b1c Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Tue, 3 Jan 2017 12:42:42 +0100
Subject: [PATCH 052/258] tools lib traceevent: Fix prev/next_prio for deadline
 tasks

Currently, the sched:sched_switch tracepoint reports deadline tasks with
priority -1. But when reading the trace via perf script I've got the
following output:

  # ./d & # (d is a deadline task, see [1])
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295]
               d  2593 [000]  2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295]

The task d reports the wrong priority [4294967295]. This happens because
the "int prio" is stored in an unsigned long long val. Although it is
set as a %lld, as int is shorter than unsigned long long,
trace_seq_printf prints it as a positive number.

The fix is just to cast the val as an int, and print it as a %d,
as in the sched:sched_switch tracepoint's "format".

The output with the fix is:

  # ./d &
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1]
               d 10941 [000]  4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120]

[1] d.c

 ---
  #include <stdio.h>
  #include <unistd.h>
  #include <sys/syscall.h>
  #include <linux/types.h>
  #include <linux/sched.h>

  struct sched_attr {
	__u32 size, sched_policy;
	__u64 sched_flags;
	__s32 sched_nice;
	__u32 sched_priority;
	__u64 sched_runtime, sched_deadline, sched_period;
  };

  int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags)
  {
	return syscall(__NR_sched_setattr, pid, attr, flags);
  }

  int main(void)
  {
	struct sched_attr attr = {
		.size		= sizeof(attr),
		.sched_policy	= SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */
		.sched_runtime	= 10 * 1000 * 1000,
		.sched_period	= attr.sched_deadline = 30 * 1000 * 1000,
	};

	if (sched_setattr(0, &attr, 0) < 0) {
		perror("sched_setattr");
		return -1;
	}

	for(;;);
  }
 ---

Committer notes:

Got the program from the provided URL, http://bristot.me/lkml/d.c,
trimmed it and included in the cset log above, so that we have
everything needed to test it in one place.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/plugin_sched_switch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index f1ce60065258..ec30c2fcbac0 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld ", val);
 
 	if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, "[%lld] ", val);
+		trace_seq_printf(s, "[%d] ", (int) val);
 
 	if (pevent_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
 		write_state(s, val);
@@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld", val);
 
 	if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, " [%lld]", val);
+		trace_seq_printf(s, " [%d]", (int) val);
 
 	return 0;
 }

From 30a9c6444810429aa2b7cbfbd453ce339baaadbf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Jan 2017 12:03:59 -0300
Subject: [PATCH 053/258] perf tools: Install tools/lib/traceevent plugins with
 install-bin

Those are binaries as well, so should be installed by:

  make -C tools/perf install-bin'

too.

Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-3841b37u05evxrs1igkyu6ks@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e9ec531131ca..4db68aec9913 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -704,9 +704,9 @@ install-tests: all install-gtk
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
 		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
-install-bin: install-tools install-tests
+install-bin: install-tools install-tests install-traceevent-plugins
 
-install: install-bin try-install-man install-traceevent-plugins
+install: install-bin try-install-man
 
 install-python_ext:
 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'

From 7934c98a6e04028eb34c1293bfb5a6b0ab630b66 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Jan 2017 15:19:21 -0300
Subject: [PATCH 054/258] perf symbols: Robustify reading of build-id from
 sysfs

Markus reported that perf segfaults when reading /sys/kernel/notes from
a kernel linked with GNU gold, due to what looks like a gold bug, so do
some bounds checking to avoid crashing in that case.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Report-Link: http://lkml.kernel.org/r/20161219161821.GA294@x4
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-ryhgs6a6jxvz207j2636w31c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 99400b0e8f2a..adbc6c02c3aa 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
 				break;
 		} else {
 			int n = namesz + descsz;
+
+			if (n > (int)sizeof(bf)) {
+				n = sizeof(bf);
+				pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+					 __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+			}
 			if (read(fd, bf, n) != n)
 				break;
 		}

From 47e3a5edc6538d66e470aaed3b7c57255cb37ca1 Mon Sep 17 00:00:00 2001
From: Paul Donohue <linux-kernel@PaulSD.com>
Date: Tue, 3 Jan 2017 10:39:28 -0800
Subject: [PATCH 055/258] Input: ALPS - fix TrackStick Y axis handling for SS5
 hardware

A minus character was lost in commit 23fce365, causing the Y axis to be
inverted for SS5 TrackStick events.  (Pushing the TrackStick up caused
the pointer to move down, and vice versa.)  Restore the lost minus.

Fixes: 23fce365c6a2 ("Input: ALPS - clean up code for SS5 hardware")
Signed-off-by: Paul Donohue <linux-kernel@PaulSD.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index cde6f4bd8ea2..6d279aa27cb9 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -114,7 +114,7 @@ enum SS4_PACKET_ID {
 				 (_b[1] & 0x7F)		\
 				)
 
-#define SS4_TS_Y_V2(_b)		(s8)(				\
+#define SS4_TS_Y_V2(_b)		-(s8)(				\
 				 ((_b[3] & 0x01) << 7) |	\
 				 (_b[2] & 0x7F)		\
 				)

From 01427fe7c4b956b878e55e966690624a3624e991 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 3 Jan 2017 11:51:48 -0800
Subject: [PATCH 056/258] Input: adxl34x - make it enumerable in ACPI
 environment

The ACPI-enabled platform may contain _DSD method to enable this driver
using compatible string.

Remove OF specifics to re-use existing code on ACPI-enabled platforms.

Suggested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/adxl34x-i2c.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index a8b0a2eec344..7fed92fb8cc1 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -136,7 +136,6 @@ static const struct i2c_device_id adxl34x_id[] = {
 
 MODULE_DEVICE_TABLE(i2c, adxl34x_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id adxl34x_of_id[] = {
 	/*
 	 * The ADXL346 is backward-compatible with the ADXL345. Differences are
@@ -153,13 +152,12 @@ static const struct of_device_id adxl34x_of_id[] = {
 };
 
 MODULE_DEVICE_TABLE(of, adxl34x_of_id);
-#endif
 
 static struct i2c_driver adxl34x_driver = {
 	.driver = {
 		.name = "adxl34x",
 		.pm = &adxl34x_i2c_pm,
-		.of_match_table = of_match_ptr(adxl34x_of_id),
+		.of_match_table = adxl34x_of_id,
 	},
 	.probe    = adxl34x_i2c_probe,
 	.remove   = adxl34x_i2c_remove,

From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 4 Jan 2017 12:29:05 +0900
Subject: [PATCH 057/258] perf probe: Fix --funcs to show correct symbols for
 offline module

Fix --funcs (-F) option to show correct symbols for offline module.
Since previous perf-probe uses machine__findnew_module_map() for offline
module, even if user passes a module file (with full path) which is for
other architecture, perf-probe always tries to load symbol map for
current kernel module.

This fix uses dso__new_map() to load the map from given binary as same
as a map for user applications.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8f810961ec78..542e6472c4d7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module)
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
-		return machine__findnew_module_map(host_machine, 0, module);
+		return dso__new_map(module);
 
 	if (!module)
 		module = "kernel";
@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
 		if (strncmp(pos->dso->short_name + 1, module,
 			    pos->dso->short_name_len - 2) == 0 &&
 		    module[pos->dso->short_name_len - 2] == '\0') {
+			map__get(pos);
 			return pos;
 		}
 	}
@@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user)
 		return kernel_get_module_map(target);
 }
 
-static void put_target_map(struct map *map, bool user)
-{
-	if (map && user) {
-		/* Only the user map needs to be released */
-		map__put(map);
-	}
-}
-
-
 static int convert_exec_to_group(const char *exec, char **result)
 {
 	char *ptr1, *ptr2, *exec_copy;
@@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
 	}
 
 out:
-	put_target_map(map, uprobes);
+	map__put(map);
 	return ret;
 
 }
@@ -2869,7 +2861,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	}
 
 out:
-	put_target_map(map, pev->uprobes);
+	map__put(map);
 	free(syms);
 	return ret;
 
@@ -3362,10 +3354,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 		return ret;
 
 	/* Get a symbol map */
-	if (user)
-		map = dso__new_map(target);
-	else
-		map = kernel_get_module_map(target);
+	map = get_target_map(target, user);
 	if (!map) {
 		pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
 		return -EINVAL;
@@ -3397,9 +3386,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
         }
 
 end:
-	if (user) {
-		map__put(map);
-	}
+	map__put(map);
 	exit_probe_symbol_maps();
 
 	return ret;

From 8a937a25a7e3c19d5fb3f9d92f605cf5fda219d8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 4 Jan 2017 12:30:19 +0900
Subject: [PATCH 058/258] perf probe: Fix to probe on gcc generated symbols for
 offline kernel

Fix perf-probe to show probe definition on gcc generated symbols for
offline kernel (including cross-arch kernel image).

gcc sometimes optimizes functions and generate new symbols with suffixes
such as ".constprop.N" or ".isra.N" etc. Since those symbol names are
not recorded in DWARF, we have to find correct generated symbols from
offline ELF binary to probe on it (kallsyms doesn't correct it).  For
online kernel or uprobes we don't need it because those are rebased on
_text, or a section relative address.

E.g. Without this:

  $ perf probe -k build-arm/vmlinux -F __slab_alloc*
  __slab_alloc.constprop.9
  $ perf probe -k build-arm/vmlinux -D __slab_alloc
  p:probe/__slab_alloc __slab_alloc+0

If you put above definition on target machine, it should fail
because there is no __slab_alloc in kallsyms.

With this fix, perf probe shows correct probe definition on
__slab_alloc.constprop.9:

  $ perf probe -k build-arm/vmlinux -D __slab_alloc
  p:probe/__slab_alloc __slab_alloc.constprop.9+0

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148350060434.19001.11864836288580083501.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 48 ++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 542e6472c4d7..4a57c8a60bd9 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -610,6 +610,51 @@ error:
 	return ret ? : -ENOENT;
 }
 
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+					int ntevs, const char *pathname)
+{
+	struct symbol *sym;
+	struct map *map;
+	unsigned long stext = 0;
+	u64 addr;
+	int i;
+
+	/* Prepare a map for offline binary */
+	map = dso__new_map(pathname);
+	if (!map || get_text_start_address(pathname, &stext) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", pathname);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		addr = tevs[i].point.address + tevs[i].point.offset - stext;
+		sym = map__find_symbol(map, addr);
+		if (!sym)
+			continue;
+		if (!strcmp(sym->name, tevs[i].point.symbol))
+			continue;
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = strdup(sym->name);
+		tevs[i].point.offset = addr - sym->start;
+	}
+	map__put(map);
+
+	return 0;
+}
+
 static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
 					  int ntevs, const char *exec)
 {
@@ -671,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 
 	/* Skip post process if the target is an offline kernel */
 	if (symbol_conf.ignore_vmlinux_buildid)
-		return 0;
+		return post_process_offline_probe_trace_events(tevs, ntevs,
+						symbol_conf.vmlinux_name);
 
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {

From 4ee437fbf626b5ad756889d8bc0fcead3d66dde7 Mon Sep 17 00:00:00 2001
From: Caleb Crome <caleb@crome.org>
Date: Tue, 3 Jan 2017 10:22:57 -0800
Subject: [PATCH 059/258] ASoC: fsl_ssi: set fifo watermark to more reliable
 value

The fsl_ssi fifo watermark is by default set to 2 free spaces (i.e.
activate DMA on FIFO when only 2 spaces are left.)  This means the
DMA must service the fifo within 2 audio samples, which is just not
enough time  for many use cases with high data rate.  In many
configurations the audio channel slips (causing l/r swap in stereo
configurations, or channel slipping in multi-channel configurations).

This patch gives more breathing room and allows the SSI to operate
reliably by changing the fifio refill watermark to 8.

There is no change in behavior for older chips (with an 8-deep fifo).
Only the newer chips with a 15-deep fifo get the new behavior. I
suspect a new fifo depth setting could be optimized on the older
chips too, but I have not tested.

Signed-off-by: Caleb Crome <caleb@crome.org>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_ssi.c | 74 +++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 21 deletions(-)

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 50349437d961..fde08660b63b 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -224,6 +224,12 @@ struct fsl_ssi_soc_data {
  * @dbg_stats: Debugging statistics
  *
  * @soc: SoC specific data
+ *
+ * @fifo_watermark: the FIFO watermark setting.  Notifies DMA when
+ *             there are @fifo_watermark or fewer words in TX fifo or
+ *             @fifo_watermark or more empty words in RX fifo.
+ * @dma_maxburst: max number of words to transfer in one go.  So far,
+ *             this is always the same as fifo_watermark.
  */
 struct fsl_ssi_private {
 	struct regmap *regs;
@@ -263,6 +269,9 @@ struct fsl_ssi_private {
 
 	const struct fsl_ssi_soc_data *soc;
 	struct device *dev;
+
+	u32 fifo_watermark;
+	u32 dma_maxburst;
 };
 
 /*
@@ -1051,21 +1060,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev,
 	regmap_write(regs, CCSR_SSI_SRCR, srcr);
 	regmap_write(regs, CCSR_SSI_SCR, scr);
 
-	/*
-	 * Set the watermark for transmit FIFI 0 and receive FIFO 0. We don't
-	 * use FIFO 1. We program the transmit water to signal a DMA transfer
-	 * if there are only two (or fewer) elements left in the FIFO. Two
-	 * elements equals one frame (left channel, right channel). This value,
-	 * however, depends on the depth of the transmit buffer.
-	 *
-	 * We set the watermark on the same level as the DMA burstsize.  For
-	 * fiq it is probably better to use the biggest possible watermark
-	 * size.
-	 */
-	if (ssi_private->use_dma)
-		wm = ssi_private->fifo_depth - 2;
-	else
-		wm = ssi_private->fifo_depth;
+	wm = ssi_private->fifo_watermark;
 
 	regmap_write(regs, CCSR_SSI_SFCSR,
 			CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) |
@@ -1373,12 +1368,8 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev,
 		dev_dbg(&pdev->dev, "could not get baud clock: %ld\n",
 			 PTR_ERR(ssi_private->baudclk));
 
-	/*
-	 * We have burstsize be "fifo_depth - 2" to match the SSI
-	 * watermark setting in fsl_ssi_startup().
-	 */
-	ssi_private->dma_params_tx.maxburst = ssi_private->fifo_depth - 2;
-	ssi_private->dma_params_rx.maxburst = ssi_private->fifo_depth - 2;
+	ssi_private->dma_params_tx.maxburst = ssi_private->dma_maxburst;
+	ssi_private->dma_params_rx.maxburst = ssi_private->dma_maxburst;
 	ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0;
 	ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0;
 
@@ -1543,6 +1534,47 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                 /* Older 8610 DTs didn't have the fifo-depth property */
 		ssi_private->fifo_depth = 8;
 
+	/*
+	 * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't
+	 * use FIFO 1 but set the watermark appropriately nontheless.
+	 * We program the transmit water to signal a DMA transfer
+	 * if there are N elements left in the FIFO. For chips with 15-deep
+	 * FIFOs, set watermark to 8.  This allows the SSI to operate at a
+	 * high data rate without channel slipping. Behavior is unchanged
+	 * for the older chips with a fifo depth of only 8.  A value of 4
+	 * might be appropriate for the older chips, but is left at
+	 * fifo_depth-2 until sombody has a chance to test.
+	 *
+	 * We set the watermark on the same level as the DMA burstsize.  For
+	 * fiq it is probably better to use the biggest possible watermark
+	 * size.
+	 */
+	switch (ssi_private->fifo_depth) {
+	case 15:
+		/*
+		 * 2 samples is not enough when running at high data
+		 * rates (like 48kHz @ 16 bits/channel, 16 channels)
+		 * 8 seems to split things evenly and leave enough time
+		 * for the DMA to fill the FIFO before it's over/under
+		 * run.
+		 */
+		ssi_private->fifo_watermark = 8;
+		ssi_private->dma_maxburst = 8;
+		break;
+	case 8:
+	default:
+		/*
+		 * maintain old behavior for older chips.
+		 * Keeping it the same because I don't have an older
+		 * board to test with.
+		 * I suspect this could be changed to be something to
+		 * leave some more space in the fifo.
+		 */
+		ssi_private->fifo_watermark = ssi_private->fifo_depth - 2;
+		ssi_private->dma_maxburst = ssi_private->fifo_depth - 2;
+		break;
+	}
+
 	dev_set_drvdata(&pdev->dev, ssi_private);
 
 	if (ssi_private->soc->imx) {

From cf9e1672a66c49ed8903c01b4c380a2f2dc91b40 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Mon, 5 Dec 2016 03:47:10 +0200
Subject: [PATCH 060/258] mtd: nand: lpc32xx: fix invalid error handling of a
 requested irq

Semantics of NR_IRQS is different on machines with SPARSE_IRQ option
disabled or enabled, in the latter case IRQs are allocated starting
at least from the value specified by NR_IRQS and going upwards, so
the check of (irq >= NR_IRQ) to decide about an error code returned by
platform_get_irq() is completely invalid, don't attempt to overrule
irq subsystem in the driver.

The change fixes LPC32xx NAND MLC driver initialization on boot.

Fixes: 8cb17b5ed017 ("irqchip: Add LPC32xx interrupt controller driver")
Cc: stable@kernel.org # v4.7+
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Acked-by: Sylvain Lemieux <slemieux.tyco@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/lpc32xx_mlc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index 5553a5d9efd1..846a66c1b133 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -775,7 +775,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	init_completion(&host->comp_controller);
 
 	host->irq = platform_get_irq(pdev, 0);
-	if ((host->irq < 0) || (host->irq >= NR_IRQS)) {
+	if (host->irq < 0) {
 		dev_err(&pdev->dev, "failed to get platform irq\n");
 		res = -EINVAL;
 		goto err_exit3;

From dd853fd216d1485ed3045ff772079cc8689a9a4a Mon Sep 17 00:00:00 2001
From: Lukasz Odzioba <lukasz.odzioba@intel.com>
Date: Wed, 28 Dec 2016 14:55:40 +0100
Subject: [PATCH 061/258] x86/cpu: Fix bootup crashes by sanitizing the
 argument of the 'clearcpuid=' command-line option

A negative number can be specified in the cmdline which will be used as
setup_clear_cpu_cap() argument. With that we can clear/set some bit in
memory predceeding boot_cpu_data/cpu_caps_cleared which may cause kernel
to misbehave. This patch adds lower bound check to setup_disablecpuid().

Boris Petkov reproduced a crash:

  [    1.234575] BUG: unable to handle kernel paging request at ffffffff858bd540
  [    1.236535] IP: memcpy_erms+0x6/0x10

Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andi.kleen@intel.com
Cc: bp@alien8.de
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: slaoub@gmail.com
Fixes: ac72e7888a61 ("x86: add generic clearcpuid=... option")
Link: http://lkml.kernel.org/r/1482933340-11857-1-git-send-email-lukasz.odzioba@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dc1697ca5191..9bab7a8a4293 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg)
 {
 	int bit;
 
-	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
 		setup_clear_cpu_cap(bit);
 	else
 		return 0;

From 754c73cf4d2463022b2c9ae208026bf22564ed06 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 2 Jan 2017 11:22:29 +0200
Subject: [PATCH 062/258] x86/cpu: Fix typo in the comment for Anniedale

The proper spelling of Anniedale SoC with 'e' in the middle. Fix typo in the
comment line in intel-family.h header.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170102092229.87036-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/intel-family.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..8167fdb67ae8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -57,7 +57,7 @@
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 

From 159d3726db12b3476bc59ea0ab0a702103d466b5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 2 Jan 2017 11:24:50 +0200
Subject: [PATCH 063/258] x86/platform/intel-mid: Rename 'spidev' to
 'mrfld_spidev'

The current implementation supports only Intel Merrifield platforms. Don't mess
with the rest of the Intel MID family by not registering device with wrong
properties.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170102092450.87229-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/Makefile              | 2 +-
 .../{platform_spidev.c => platform_mrfld_spidev.c}            | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_spidev.c => platform_mrfld_spidev.c} (91%)

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 61b5ed2b7d40..90e4f2a6625b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
 # SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
similarity index 91%
rename from arch/x86/platform/intel-mid/device_libs/platform_spidev.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 30c601b399ee..27186ad654c9 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -11,6 +11,7 @@
  * of the License.
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/sfi.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info)
 {
 	struct spi_board_info *spi_info = info;
 
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return ERR_PTR(-ENODEV);
+
 	spi_info->mode = SPI_MODE_0;
 	spi_info->controller_data = &spidev_spi_chip;
 

From 74545f63890e38520eb4d1dbedcadaa9c0dbc824 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Thu, 22 Dec 2016 17:17:40 -0800
Subject: [PATCH 064/258] perf/x86: Set pmu->module in Intel PMU modules

The conversion of Intel PMU drivers into modules did not include reference
counting. The machine will crash when attempting to  access deleted code
if an event from a module PMU is started and the module removed before the
event is destroyed.

i.e. this crashes the machine:

	$ insmod intel-rapl-perf.ko
	$ perf stat -e power/energy-cores/ -C 0 &
	$ rmmod intel-rapl-perf.ko

Set THIS_MODULE to pmu->module in Intel module PMUs so that generic code
can handle reference counting and deny rmmod while an event still exists.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1482455860-116269-1-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 2 ++
 arch/x86/events/intel/rapl.c   | 1 +
 arch/x86/events/intel/uncore.c | 1 +
 3 files changed, 4 insertions(+)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index fec8a461bdef..1076c9a77292 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static struct pmu cstate_pkg_pmu = {
@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static const struct cstate_model nhm_cstates __initconst = {
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index bd34124449b0..17c3564d087a 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void)
 	rapl_pmus->pmu.start		= rapl_pmu_event_start;
 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	rapl_pmus->pmu.module		= THIS_MODULE;
 	return 0;
 }
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 97c246f84dea..8c4ccdc3a3f3 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 			.start		= uncore_pmu_event_start,
 			.stop		= uncore_pmu_event_stop,
 			.read		= uncore_pmu_event_read,
+			.module		= THIS_MODULE,
 		};
 	} else {
 		pmu->pmu = *pmu->type->pmu;

From 60448b077ed93d227e6c117a9e87db76ff0c1911 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 4 Jan 2017 15:44:52 -0600
Subject: [PATCH 065/258] ASoC: Intel: bytcr-rt5640: fix settings in internal
 clock mode

Frequency value of zero did not make sense, use same 24.576MHz
setting and only change the clock source in idle mode

Suggested-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index e33e4777a65c..8d2fb2d6f532 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -142,7 +142,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w,
 		 * for Jack detection and button press
 		 */
 		ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_RCCLK,
-					     0,
+					     48000 * 512,
 					     SND_SOC_CLOCK_IN);
 		if (!ret) {
 			if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && priv->mclk)

From 08f9572671c8047e7234cbf150869aa3c3d59a97 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 5 Jan 2017 14:25:59 +0100
Subject: [PATCH 066/258] HID: ignore Petzl USB headlamp

This headlamp contains a dummy HID descriptor which pretends to be
a mouse-like device, but can't be used as a mouse at all.

Reported-by: Lukas Ocilka <lukas.ocilka@suse.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 1 +
 drivers/hid/hid-ids.h  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index cff060b56da9..ea36b557d5ee 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2496,6 +2496,7 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) },
 #if IS_ENABLED(CONFIG_MOUSE_SYNAPTICS_USB)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 54bd22dc1411..f46f2c5117fa 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -816,6 +816,9 @@
 #define USB_VENDOR_ID_PETALYNX		0x18b1
 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE	0x0037
 
+#define USB_VENDOR_ID_PETZL		0x2122
+#define USB_DEVICE_ID_PETZL_HEADLAMP	0x1234
+
 #define USB_VENDOR_ID_PHILIPS		0x0471
 #define USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE 0x0617
 

From 28ca833ecf89c585a9543fb21aef6b2bdbbaa48a Mon Sep 17 00:00:00 2001
From: JackieLiu <liuyun01@kylinos.cn>
Date: Tue, 13 Dec 2016 13:55:27 +0800
Subject: [PATCH 067/258] md/raid5-cache: removes unnecessary write-through
 mode judgments

The write-through mode has been returned in front of the function,
do not need to do it again.

Signed-off-by: JackieLiu <liuyun01@kylinos.cn>
Reviewed-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index d7bfb6fc8aef..49aea4231084 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2418,9 +2418,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	if (do_wakeup)
 		wake_up(&conf->wait_for_overlap);
 
-	if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
-		return;
-
 	spin_lock_irq(&conf->log->stripe_in_journal_lock);
 	list_del_init(&sh->r5c);
 	spin_unlock_irq(&conf->log->stripe_in_journal_lock);

From 3c66abbaaf69671dfd3eb9fa7740b5d7ec688231 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Dec 2016 15:38:01 -0800
Subject: [PATCH 068/258] md/r5cache: simplify handling of sh->log_start in
 recovery

We only need to update sh->log_start at the end of recovery,
which is r5c_recovery_rewrite_data_only_stripes(), so it is not
necessary to set it before that. In this patch, log_start is
removed from r5c_recovery_alloc_stripe().

After updating all sh->log_start, rewrite_data_only_stripes()
also updates log->next_checkpoints to the last sh->log_start.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 49aea4231084..b178a8fef266 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1682,8 +1682,7 @@ out:
 
 static struct stripe_head *
 r5c_recovery_alloc_stripe(struct r5conf *conf,
-			  sector_t stripe_sect,
-			  sector_t log_start)
+			  sector_t stripe_sect)
 {
 	struct stripe_head *sh;
 
@@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf,
 		return NULL;  /* no more stripe available */
 
 	r5l_recovery_reset_stripe(sh);
-	sh->log_start = log_start;
 
 	return sh;
 }
@@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 						stripe_sect);
 
 		if (!sh) {
-			sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos);
+			sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
 			/*
 			 * cannot get stripe from raid5_get_active_stripe
 			 * try replay some stripes
@@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 				r5c_recovery_replay_stripes(
 					cached_stripe_list, ctx);
 				sh = r5c_recovery_alloc_stripe(
-					conf, stripe_sect, ctx->pos);
+					conf, stripe_sect);
 			}
 			if (!sh) {
 				pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
@@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 					conf->min_nr_stripes * 2);
 				raid5_set_cache_size(mddev,
 						     conf->min_nr_stripes * 2);
-				sh = r5c_recovery_alloc_stripe(
-					conf, stripe_sect, ctx->pos);
+				sh = r5c_recovery_alloc_stripe(conf,
+							       stripe_sect);
 			}
 			if (!sh) {
 				pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
@@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 			if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
 			    test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
 				r5l_recovery_replay_one_stripe(conf, sh, ctx);
-				sh->log_start = ctx->pos;
 				list_move_tail(&sh->lru, cached_stripe_list);
 			}
 			r5l_recovery_load_data(log, sh, ctx, payload,
@@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log,
 			set_bit(R5_UPTODATE, &dev->flags);
 		}
 	}
-	list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
-	atomic_inc(&log->stripe_in_journal_count);
 }
 
 /*
@@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 	struct stripe_head *sh, *next;
 	struct mddev *mddev = log->rdev->mddev;
 	struct page *page;
+	sector_t next_checkpoint = MaxSector;
 
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
@@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		return -ENOMEM;
 	}
 
+	WARN_ON(list_empty(&ctx->cached_list));
+
 	list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
 		struct r5l_meta_block *mb;
 		int i;
@@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
 			     REQ_OP_WRITE, REQ_FUA, false);
 		sh->log_start = ctx->pos;
+		list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
+		atomic_inc(&log->stripe_in_journal_count);
 		ctx->pos = write_pos;
 		ctx->seq += 1;
-
+		next_checkpoint = sh->log_start;
 		list_del_init(&sh->lru);
 		raid5_release_stripe(sh);
 	}
+	log->next_checkpoint = next_checkpoint;
 	__free_page(page);
 	return 0;
 }
@@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log)
 	struct r5l_recovery_ctx ctx;
 	int ret;
 	sector_t pos;
-	struct stripe_head *sh;
 
 	ctx.pos = log->last_checkpoint;
 	ctx.seq = log->last_cp_seq;
@@ -2164,9 +2164,6 @@ static int r5l_recovery_log(struct r5l_log *log)
 		log->next_checkpoint = ctx.pos;
 		r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
 		ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-	} else {
-		sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
-		log->next_checkpoint = sh->log_start;
 	}
 
 	if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))

From d2250f105f18a43fdab17421bd80b0ffc9fcc53f Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Dec 2016 15:38:02 -0800
Subject: [PATCH 069/258] md/r5cache: assign conf->log before r5l_load_log()

r5l_load_log() calls functions that requires a proper conf->log,
for example, r5c_is_writeback(). Therefore, we should set
conf->log before calling r5l_load_log(). If r5l_load_log() fails,
conf->log is set back to NULL.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b178a8fef266..bff1b4a949e8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2633,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	spin_lock_init(&log->stripe_in_journal_lock);
 	atomic_set(&log->stripe_in_journal_count, 0);
 
+	rcu_assign_pointer(conf->log, log);
+
 	if (r5l_load_log(log))
 		goto error;
 
-	rcu_assign_pointer(conf->log, log);
 	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
 	return 0;
 
 error:
+	rcu_assign_pointer(conf->log, NULL);
 	md_unregister_thread(&log->reclaim_thread);
 reclaim_thread:
 	mempool_destroy(log->meta_pool);

From 99f17890f04cff0262de7393c60a2f6d9c9c7e71 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Dec 2016 00:52:30 +0000
Subject: [PATCH 070/258] md/r5cache: fix spelling mistake on "recoverying"

Trivial fix to spelling mistake "recoverying" to "recovering" in
pr_dbg message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index bff1b4a949e8..0e8ed2c327b0 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2170,7 +2170,7 @@ static int r5l_recovery_log(struct r5l_log *log)
 		pr_debug("md/raid:%s: starting from clean shutdown\n",
 			 mdname(mddev));
 	else {
-		pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
+		pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
 			 mdname(mddev), ctx.data_only_stripes,
 			 ctx.data_parity_stripes);
 

From 394ed8e4743b0cfc5496fe49059fbfc2bc8eae35 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 4 Jan 2017 16:10:19 -0800
Subject: [PATCH 071/258] md: cleanup mddev flag clear for takeover

Commit 6995f0b (md: takeover should clear unrelated bits) clear
unrelated bits, but it's quite fragile. To avoid error in the future,
define a macro for unsupported mddev flags for each raid type and use it
to clear unsupported mddev flags. This should be less error-prone.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.h    |  8 ++++++++
 drivers/md/raid0.c | 12 ++++++++----
 drivers/md/raid1.c |  8 ++++++--
 drivers/md/raid5.c |  5 ++++-
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/md/md.h b/drivers/md/md.h
index e38936d05df1..2a514036a83d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 				int is_new);
 struct md_cluster_info;
 
+/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
 enum mddev_flags {
 	MD_ARRAY_FIRST_USE,	/* First use of array, needs initialization */
 	MD_CLOSING,		/* If set, we are closing the array, do not open
@@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev)
 {
 	return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
 }
+
+/* clear unsupported mddev_flags */
+static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
+	unsigned long unsupported_flags)
+{
+	mddev->flags &= ~unsupported_flags;
+}
 #endif /* _MD_MD_H */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a162fedeb51a..848365d474f3 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -26,6 +26,11 @@
 #include "raid0.h"
 #include "raid5.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS		\
+	((1L << MD_HAS_JOURNAL) |	\
+	 (1L << MD_JOURNAL_CLEAN) |	\
+	 (1L << MD_FAILFAST_SUPPORTED))
+
 static int raid0_congested(struct mddev *mddev, int bits)
 {
 	struct r0conf *conf = mddev->private;
@@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
 	mddev->delta_disks = -1;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_HAS_JOURNAL, &mddev->flags);
-	clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 
@@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
 	mddev->degraded = 0;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 	return priv_conf;
@@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
 	mddev->raid_disks = 1;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 	return priv_conf;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 14422407e520..7b0f647bcccb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -42,6 +42,10 @@
 #include "raid1.h"
 #include "bitmap.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS		\
+	((1L << MD_HAS_JOURNAL) |	\
+	 (1L << MD_JOURNAL_CLEAN))
+
 /*
  * Number of guaranteed r1bios in case of extreme VM load:
  */
@@ -3257,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev)
 		if (!IS_ERR(conf)) {
 			/* Array must appear to be quiesced */
 			conf->array_frozen = 1;
-			clear_bit(MD_HAS_JOURNAL, &mddev->flags);
-			clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
+			mddev_clear_unsupported_flags(mddev,
+				UNSUPPORTED_MDDEV_FLAGS);
 		}
 		return conf;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 06d7279bdd04..7b1da6e95a56 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -62,6 +62,8 @@
 #include "raid0.h"
 #include "bitmap.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS	(1L << MD_FAILFAST_SUPPORTED)
+
 #define cpu_to_group(cpu) cpu_to_node(cpu)
 #define ANY_GROUP NUMA_NO_NODE
 
@@ -7830,7 +7832,8 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
 
 	ret = setup_conf(mddev);
 	if (!IS_ERR_VALUE(ret))
-		clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+		mddev_clear_unsupported_flags(mddev,
+			UNSUPPORTED_MDDEV_FLAGS);
 	return ret;
 }
 

From 1c3415a06b1016a596bfe59e0cfee56c773aa958 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 5 Jan 2017 14:14:54 -0800
Subject: [PATCH 072/258] Input: elants_i2c - avoid divide by 0 errors on bad
 touchscreen data

The following crash may be seen if bad data is received from the
touchscreen.

[ 2189.425150] elants_i2c i2c-ELAN0001:00: unknown packet ff ff ff ff
[ 2189.430738] divide error: 0000 [#1] PREEMPT SMP
[ 2189.434679] gsmi: Log Shutdown Reason 0x03
[ 2189.434689] Modules linked in: ip6t_REJECT nf_reject_ipv6 rfcomm evdi
uinput uvcvideo cmac videobuf2_vmalloc videobuf2_memops snd_hda_codec_hdmi
i2c_dev videobuf2_core snd_soc_sst_cht_bsw_rt5645 snd_hda_intel
snd_intel_sst_acpi btusb btrtl btbcm btintel bluetooth snd_soc_sst_acpi
snd_hda_codec snd_intel_sst_core snd_hwdep snd_soc_sst_mfld_platform
snd_hda_core snd_soc_rt5645 memconsole_x86_legacy memconsole zram snd_soc_rl6231
fuse ip6table_filter iwlmvm iwlwifi iwl7000_mac80211 cfg80211 iio_trig_sysfs
joydev cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer
kfifo_buf industrialio snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq
snd_seq_device ppp_async ppp_generic slhc tun
[ 2189.434866] CPU: 0 PID: 106 Comm: irq/184-ELAN000 Tainted: G        W
3.18.0-13101-g57e8190 #1
[ 2189.434883] Hardware name: GOOGLE Ultima, BIOS Google_Ultima.7287.131.43 07/20/2016
[ 2189.434898] task: ffff88017a0b6d80 ti: ffff88017a2bc000 task.ti: ffff88017a2bc000
[ 2189.434913] RIP: 0010:[<ffffffffbecc48d5>]  [<ffffffffbecc48d5>] elants_i2c_irq+0x190/0x200
[ 2189.434937] RSP: 0018:ffff88017a2bfd98  EFLAGS: 00010293
[ 2189.434948] RAX: 0000000000000000 RBX: ffff88017a967828 RCX: ffff88017a9678e8
[ 2189.434962] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000000
[ 2189.434975] RBP: ffff88017a2bfdd8 R08: 00000000000003e8 R09: 0000000000000000
[ 2189.434989] R10: 0000000000000000 R11: 000000000044a2bd R12: ffff88017a991800
[ 2189.435001] R13: ffffffffbe8a2a53 R14: ffff88017a0b6d80 R15: ffff88017a0b6d80
[ 2189.435011] FS:  0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000
[ 2189.435022] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 2189.435030] CR2: 00007f678d94b000 CR3: 000000003f41a000 CR4: 00000000001007f0
[ 2189.435039] Stack:
[ 2189.435044]  ffff88017a2bfda8 ffff88017a9678e8 646464647a2bfdd8 0000000006e09574
[ 2189.435060]  0000000000000000 ffff88017a088b80 ffff88017a921000 ffffffffbe8a2a53
[ 2189.435074]  ffff88017a2bfe08 ffffffffbe8a2a73 ffff88017a0b6d80 0000000006e09574
[ 2189.435089] Call Trace:
[ 2189.435101]  [<ffffffffbe8a2a53>] ? irq_thread_dtor+0xa9/0xa9
[ 2189.435112]  [<ffffffffbe8a2a73>] irq_thread_fn+0x20/0x40
[ 2189.435123]  [<ffffffffbe8a2be1>] irq_thread+0x14e/0x222
[ 2189.435135]  [<ffffffffbee8cbeb>] ? __schedule+0x3b3/0x57a
[ 2189.435145]  [<ffffffffbe8a29aa>] ? wake_threads_waitq+0x2d/0x2d
[ 2189.435156]  [<ffffffffbe8a2a93>] ? irq_thread_fn+0x40/0x40
[ 2189.435168]  [<ffffffffbe87c385>] kthread+0x10e/0x116
[ 2189.435178]  [<ffffffffbe87c277>] ? __kthread_parkme+0x67/0x67
[ 2189.435189]  [<ffffffffbee900ac>] ret_from_fork+0x7c/0xb0
[ 2189.435199]  [<ffffffffbe87c277>] ? __kthread_parkme+0x67/0x67
[ 2189.435208] Code: ff ff eb 73 0f b6 bb c1 00 00 00 83 ff 03 7e 13 49 8d 7c
24 20 ba 04 00 00 00 48 c7 c6 8a cd 21 bf eb 4d 0f b6 83 c2 00 00 00 99 <f7> ff
83 f8 37 75 15 48 6b f7 37 4c 8d a3 c4 00 00 00 4c 8d ac
[ 2189.435312] RIP  [<ffffffffbecc48d5>] elants_i2c_irq+0x190/0x200
[ 2189.435323]  RSP <ffff88017a2bfd98>
[ 2189.435350] ---[ end trace f4945345a75d96dd ]---
[ 2189.443841] Kernel panic - not syncing: Fatal exception
[ 2189.444307] Kernel Offset: 0x3d800000 from 0xffffffff81000000
	(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2189.444519] gsmi: Log Shutdown Reason 0x02

The problem was seen with a 3.18 based kernel, but there is no reason
to believe that the upstream code is safe.

Fixes: 66aee90088da2 ("Input: add support for Elan eKTH I2C touchscreens")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/elants_i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index 02aec284deca..3e6003d32e56 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -914,9 +914,9 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
 
 		case QUEUE_HEADER_NORMAL:
 			report_count = ts->buf[FW_HDR_COUNT];
-			if (report_count > 3) {
+			if (report_count == 0 || report_count > 3) {
 				dev_err(&client->dev,
-					"too large report count: %*ph\n",
+					"bad report count: %*ph\n",
 					HEADER_SIZE, ts->buf);
 				break;
 			}

From 9698b6f473555a722bf81a3371998427d5d27bde Mon Sep 17 00:00:00 2001
From: Satish Kharat <satishkh@cisco.com>
Date: Wed, 14 Dec 2016 13:20:41 -0800
Subject: [PATCH 073/258] scsi: fnic: Avoid sending reset to firmware when
 another reset is in progress

This fix is to avoid calling fnic_fw_reset_handler through
fnic_host_reset when a finc reset is alreay in progress.

Signed-off-by: Satish Kharat <satishkh@cisco.com>
Signed-off-by: Sesidhar Baddela <sebaddel@cisco.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fnic/fnic.h      |  1 +
 drivers/scsi/fnic/fnic_scsi.c | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 9ddc9200e0a4..9e4b7709043e 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -248,6 +248,7 @@ struct fnic {
 	struct completion *remove_wait; /* device remove thread blocks */
 
 	atomic_t in_flight;		/* io counter */
+	bool internal_reset_inprogress;
 	u32 _reserved;			/* fill hole */
 	unsigned long state_flags;	/* protected by host lock */
 	enum fnic_state state;
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 2544a37ece0a..adb3d5871e74 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2581,6 +2581,19 @@ int fnic_host_reset(struct scsi_cmnd *sc)
 	unsigned long wait_host_tmo;
 	struct Scsi_Host *shost = sc->device->host;
 	struct fc_lport *lp = shost_priv(shost);
+	struct fnic *fnic = lport_priv(lp);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (fnic->internal_reset_inprogress == 0) {
+		fnic->internal_reset_inprogress = 1;
+	} else {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"host reset in progress skipping another host reset\n");
+		return SUCCESS;
+	}
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 
 	/*
 	 * If fnic_reset is successful, wait for fabric login to complete
@@ -2601,6 +2614,9 @@ int fnic_host_reset(struct scsi_cmnd *sc)
 		}
 	}
 
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->internal_reset_inprogress = 0;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 	return ret;
 }
 

From 0371adcdaca92912baaa3256ed13e058a016e62d Mon Sep 17 00:00:00 2001
From: Burak Ok <burak-kernel@bur0k.de>
Date: Wed, 21 Dec 2016 14:45:53 +0100
Subject: [PATCH 074/258] scsi: snic: Return error code on memory allocation
 failure

If a call to mempool_create_slab_pool() in snic_probe() returns NULL,
return -ENOMEM to indicate failure. mempool_creat_slab_pool() only fails
if it cannot allocate memory.

https://bugzilla.kernel.org/show_bug.cgi?id=189061

Reported-by: bianpan2010@ruc.edu.cn
Signed-off-by: Burak Ok <burak-kernel@bur0k.de>
Signed-off-by: Andreas Schaertl <andreas.schaertl@fau.de>
Acked-by: Narsimhulu Musini <nmusini@cisco.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/snic/snic_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c
index 396b32dca074..7cf70aaec0ba 100644
--- a/drivers/scsi/snic/snic_main.c
+++ b/drivers/scsi/snic/snic_main.c
@@ -591,6 +591,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "dflt sgl pool creation failed\n");
 
+		ret = -ENOMEM;
 		goto err_free_res;
 	}
 
@@ -601,6 +602,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "max sgl pool creation failed\n");
 
+		ret = -ENOMEM;
 		goto err_free_dflt_sgl_pool;
 	}
 
@@ -611,6 +613,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "snic tmreq info pool creation failed.\n");
 
+		ret = -ENOMEM;
 		goto err_free_max_sgl_pool;
 	}
 

From 2d1148f0f45079d25a0fa0d67e4fdb2a656d12fb Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Fri, 23 Dec 2016 20:40:19 -0800
Subject: [PATCH 075/258] scsi: bfa: Increase requested firmware version to
 3.2.5.1

bna & bfa firmware version 3.2.5.1 was submitted to linux-firmware on
Feb 17 19:10:20 2015 -0500 in 0ab54ff1dc ("linux-firmware: Add QLogic BR
Series Adapter Firmware").

bna was updated to use the newer firmware on Feb 19 16:02:32 2015 -0500 in
3f307c3d70 ("bna: Update the Driver and Firmware Version")

bfa was not updated. I presume this was an oversight but it broke support
for bfa+bna cards such as the following
	04:00.0 Fibre Channel [0c04]: Brocade Communications Systems, Inc.
		1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.1 Fibre Channel [0c04]: Brocade Communications Systems, Inc.
		1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.2 Ethernet controller [0200]: Brocade Communications Systems,
		Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.3 Ethernet controller [0200]: Brocade Communications Systems,
		Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)

Currently, if the bfa module is loaded first, bna fails to probe the
respective devices with
[  215.026787] bna: QLogic BR-series 10G Ethernet driver - version: 3.2.25.1
[  215.043707] bna 0000:04:00.2: bar0 mapped to ffffc90001fc0000, len 262144
[  215.060656] bna 0000:04:00.2: initialization failed err=1
[  215.073893] bna 0000:04:00.3: bar0 mapped to ffffc90002040000, len 262144
[  215.090644] bna 0000:04:00.3: initialization failed err=1

Whereas if bna is loaded first, bfa fails with
[  249.592109] QLogic BR-series BFA FC/FCOE SCSI driver - version: 3.2.25.0
[  249.610738] bfa 0000:04:00.0: Running firmware version is incompatible with the driver version
[  249.833513] bfa 0000:04:00.0: bfa init failed
[  249.833919] scsi host6: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.0 driver: 3.2.25.0
[  249.841446] bfa 0000:04:00.1: Running firmware version is incompatible with the driver version
[  250.045449] bfa 0000:04:00.1: bfa init failed
[  250.045962] scsi host7: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.1 driver: 3.2.25.0

Increase bfa's requested firmware version. Also increase the driver
version.  I only tested that all of the devices probe without error.

Reported-by: Tim Ehlers <tehlers@gwdg.de>
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bfa/bfad.c     | 6 +++---
 drivers/scsi/bfa/bfad_drv.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 9d253cb83ee7..e70410beb83a 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -64,9 +64,9 @@ int		max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS;
 u32	bfi_image_cb_size, bfi_image_ct_size, bfi_image_ct2_size;
 u32	*bfi_image_cb, *bfi_image_ct, *bfi_image_ct2;
 
-#define BFAD_FW_FILE_CB		"cbfw-3.2.3.0.bin"
-#define BFAD_FW_FILE_CT		"ctfw-3.2.3.0.bin"
-#define BFAD_FW_FILE_CT2	"ct2fw-3.2.3.0.bin"
+#define BFAD_FW_FILE_CB		"cbfw-3.2.5.1.bin"
+#define BFAD_FW_FILE_CT		"ctfw-3.2.5.1.bin"
+#define BFAD_FW_FILE_CT2	"ct2fw-3.2.5.1.bin"
 
 static u32 *bfad_load_fwimg(struct pci_dev *pdev);
 static void bfad_free_fwimg(void);
diff --git a/drivers/scsi/bfa/bfad_drv.h b/drivers/scsi/bfa/bfad_drv.h
index f9e862093a25..cfcfff48e8e1 100644
--- a/drivers/scsi/bfa/bfad_drv.h
+++ b/drivers/scsi/bfa/bfad_drv.h
@@ -58,7 +58,7 @@
 #ifdef BFA_DRIVER_VERSION
 #define BFAD_DRIVER_VERSION    BFA_DRIVER_VERSION
 #else
-#define BFAD_DRIVER_VERSION    "3.2.25.0"
+#define BFAD_DRIVER_VERSION    "3.2.25.1"
 #endif
 
 #define BFAD_PROTO_NAME FCPI_NAME

From a33d331761bc5dd330499ca5ceceb67f0640a8e6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 5 Jan 2017 10:26:38 +0100
Subject: [PATCH 076/258] x86/CPU/AMD: Fix Bulldozer topology

The following commit:

  8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id")

... broke the initial strategy for Bulldozer-based cores' topology,
where we consider each thread of a compute unit a standalone core
and not a HT or SMT thread.

Revert to the firmware-supplied core_id numbering and do not make
them thread siblings as we don't consider them for such even if they
technically are, more or less.

Reported-and-tested-by: Brice Goglin <Brice.Goglin@inria.fr>
Tested-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # v4.6+
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id")
Link: http://lkml.kernel.org/r/20170105092638.5247-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 71cae73a5076..1d3167269a67 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
 
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		node_id = ecx & 7;
-
-		/* get compute unit information */
-		smp_num_siblings = ((ebx >> 8) & 3) + 1;
-		c->x86_max_cores /= smp_num_siblings;
-		c->cpu_core_id = ebx & 0xff;
+		node_id = cpuid_ecx(0x8000001e) & 7;
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we

From 1ebb71143758f45dc0fa76e2f48429e13b16d110 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 6 Jan 2017 15:33:36 +0100
Subject: [PATCH 077/258] HID: hid-cypress: validate length of report

Make sure we have enough of a report structure to validate before
looking at it.

Reported-by: Benoit Camredon <benoit.camredon@airbus.com>
Tested-by: Benoit Camredon <benoit.camredon@airbus.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-cypress.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index 1b764d1745f3..1689568b597d 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -39,6 +39,9 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX))
 		return rdesc;
 
+	if (*rsize < 4)
+		return rdesc;
+
 	for (i = 0; i < *rsize - 4; i++)
 		if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) {
 			rdesc[i] = 0x19;

From bc65a326c579e93a5c2120a65ede72f11369ee5a Mon Sep 17 00:00:00 2001
From: Jeeja KP <jeeja.kp@intel.com>
Date: Mon, 2 Jan 2017 09:50:05 +0530
Subject: [PATCH 078/258] ASoC: Intel: Skylake: Release FW ctx in cleanup

Saved firmware ctx was not never released, so release Firmware
ctx in cleanup routine.

Signed-off-by: Jeeja KP <jeeja.kp@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-sst.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c
index 8fc3178bc79c..b30bd384c8d3 100644
--- a/sound/soc/intel/skylake/skl-sst.c
+++ b/sound/soc/intel/skylake/skl-sst.c
@@ -515,6 +515,9 @@ EXPORT_SYMBOL_GPL(skl_sst_init_fw);
 
 void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx)
 {
+
+	if (ctx->dsp->fw)
+		release_firmware(ctx->dsp->fw);
 	skl_clear_module_table(ctx->dsp);
 	skl_freeup_uuid_list(ctx);
 	skl_ipc_free(&ctx->ipc);

From 9f169b9f52a4afccdab7a7d2311b0c53a78a1e6b Mon Sep 17 00:00:00 2001
From: Patrick Lai <plai@codeaurora.org>
Date: Sat, 31 Dec 2016 22:44:39 -0800
Subject: [PATCH 079/258] ASoC: dpcm: Avoid putting stream state to STOP when
 FE stream is paused

When multiple front-ends are using the same back-end, putting state of a
front-end to STOP state upon receiving pause command will result in backend
stream getting released by DPCM framework unintentionally. In order to
avoid backend to be released when another active front-end stream is
present, put the stream state to PAUSED state instead of STOP state.

Signed-off-by: Patrick Lai <plai@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index e7a1eaa2772f..6aba14009c92 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2184,9 +2184,11 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd)
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
-	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP;
 		break;
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED;
+		break;
 	}
 
 out:

From 20b1e22d01a4b0b11d3a1066e9feb04be38607ec Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nicstange@gmail.com>
Date: Thu, 5 Jan 2017 13:51:29 +0100
Subject: [PATCH 080/258] x86/efi: Don't allocate memmap through memblock after
 mm_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the following commit:

  4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")

...  efi_bgrt_init() calls into the memblock allocator through
efi_mem_reserve() => efi_arch_mem_reserve() *after* mm_init() has been called.

Indeed, KASAN reports a bad read access later on in efi_free_boot_services():

  BUG: KASAN: use-after-free in efi_free_boot_services+0xae/0x24c
            at addr ffff88022de12740
  Read of size 4 by task swapper/0/0
  page:ffffea0008b78480 count:0 mapcount:-127
  mapping:          (null) index:0x1 flags: 0x5fff8000000000()
  [...]
  Call Trace:
   dump_stack+0x68/0x9f
   kasan_report_error+0x4c8/0x500
   kasan_report+0x58/0x60
   __asan_load4+0x61/0x80
   efi_free_boot_services+0xae/0x24c
   start_kernel+0x527/0x562
   x86_64_start_reservations+0x24/0x26
   x86_64_start_kernel+0x157/0x17a
   start_cpu+0x5/0x14

The instruction at the given address is the first read from the memmap's
memory, i.e. the read of md->type in efi_free_boot_services().

Note that the writes earlier in efi_arch_mem_reserve() don't splat because
they're done through early_memremap()ed addresses.

So, after memblock is gone, allocations should be done through the "normal"
page allocator. Introduce a helper, efi_memmap_alloc() for this. Use
it from efi_arch_mem_reserve(), efi_free_boot_services() and, for the sake
of consistency, from efi_fake_memmap() as well.

Note that for the latter, the memmap allocations cease to be page aligned.
This isn't needed though.

Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.9
Cc: Dave Young <dyoung@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mika Penttilä <mika.penttila@nextfour.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")
Link: http://lkml.kernel.org/r/20170105125130.2815-1-nicstange@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/quirks.c  |  4 ++--
 drivers/firmware/efi/fake_mem.c |  3 +--
 drivers/firmware/efi/memmap.c   | 38 +++++++++++++++++++++++++++++++++
 include/linux/efi.h             |  1 +
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 10aca63a50d7..30031d5293c4 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 
 	new_size = efi.memmap.desc_size * num_entries;
 
-	new_phys = memblock_alloc(new_size, 0);
+	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
 		pr_err("Could not allocate boot services memmap\n");
 		return;
@@ -355,7 +355,7 @@ void __init efi_free_boot_services(void)
 	}
 
 	new_size = efi.memmap.desc_size * num_entries;
-	new_phys = memblock_alloc(new_size, 0);
+	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
 		pr_err("Failed to allocate new EFI memmap\n");
 		return;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 520a40e5e0e4..6c7d60c239b5 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -71,8 +71,7 @@ void __init efi_fake_memmap(void)
 	}
 
 	/* allocate memory for new EFI memmap */
-	new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map,
-					PAGE_SIZE);
+	new_memmap_phy = efi_memmap_alloc(new_nr_map);
 	if (!new_memmap_phy)
 		return;
 
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index f03ddecd232b..78686443cb37 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -9,6 +9,44 @@
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <asm/early_ioremap.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
+{
+	return memblock_alloc(size, 0);
+}
+
+static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
+{
+	unsigned int order = get_order(size);
+	struct page *p = alloc_pages(GFP_KERNEL, order);
+
+	if (!p)
+		return 0;
+
+	return PFN_PHYS(page_to_pfn(p));
+}
+
+/**
+ * efi_memmap_alloc - Allocate memory for the EFI memory map
+ * @num_entries: Number of entries in the allocated map.
+ *
+ * Depending on whether mm_init() has already been invoked or not,
+ * either memblock or "normal" page allocation is used.
+ *
+ * Returns the physical address of the allocated memory map on
+ * success, zero on failure.
+ */
+phys_addr_t __init efi_memmap_alloc(unsigned int num_entries)
+{
+	unsigned long size = num_entries * efi.memmap.desc_size;
+
+	if (slab_is_available())
+		return __efi_memmap_alloc_late(size);
+
+	return __efi_memmap_alloc_early(size);
+}
 
 /**
  * __efi_memmap_init - Common code for mapping the EFI memory map
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a07a476178cd..0c5420208c40 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -950,6 +950,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 
+extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries);
 extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
 extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
 extern void __init efi_memmap_unmap(void);

From eeb0d56fab4cd7848cf2be6704fa48900dbc1381 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 14 Dec 2016 16:47:43 +0100
Subject: [PATCH 081/258] mac80211: implement multicast forwarding on fast-RX
 path

In AP (or VLAN) mode, when unicast 802.11 packets are received,
they might actually be multicast after conversion. In this case
the fast-RX path didn't handle them properly to send them back
to the wireless medium. Implement that by copying the SKB and
sending it back out.

The possible alternative would be to just punt the packet back
to the regular (slow) RX path, but since we have almost all of
the required code here already it's not so complicated to add
here. Punting it back would also mean acquiring the spinlock,
which would be bad for the stated purpose of the fast-RX path,
to enable well-performing parallel RX.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3e289a64ed43..c037c5bb6167 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3942,21 +3942,31 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	u64_stats_update_end(&stats->syncp);
 
 	if (fast_rx->internal_forward) {
-		struct sta_info *dsta = sta_info_get(rx->sdata, skb->data);
+		struct sk_buff *xmit_skb = NULL;
+		bool multicast = is_multicast_ether_addr(skb->data);
 
-		if (dsta) {
+		if (multicast) {
+			xmit_skb = skb_copy(skb, GFP_ATOMIC);
+		} else if (sta_info_get(rx->sdata, skb->data)) {
+			xmit_skb = skb;
+			skb = NULL;
+		}
+
+		if (xmit_skb) {
 			/*
 			 * Send to wireless media and increase priority by 256
 			 * to keep the received priority instead of
 			 * reclassifying the frame (see cfg80211_classify8021d).
 			 */
-			skb->priority += 256;
-			skb->protocol = htons(ETH_P_802_3);
-			skb_reset_network_header(skb);
-			skb_reset_mac_header(skb);
-			dev_queue_xmit(skb);
-			return true;
+			xmit_skb->priority += 256;
+			xmit_skb->protocol = htons(ETH_P_802_3);
+			skb_reset_network_header(xmit_skb);
+			skb_reset_mac_header(xmit_skb);
+			dev_queue_xmit(xmit_skb);
 		}
+
+		if (!skb)
+			return true;
 	}
 
 	/* deliver to local stack */

From ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 14:12:51 +0100
Subject: [PATCH 082/258] btrfs: fix crash when tracepoint arguments are freed
 by wq callbacks

Enabling btrfs tracepoints leads to instant crash, as reported. The wq
callbacks could free the memory and the tracepoints started to
dereference the members to get to fs_info.

The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2
removed the tracepoints but we could preserve them by passing only the
required data in a safe way.

Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events")
CC: stable@vger.kernel.org # 4.8+
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/async-thread.c      | 15 +++++++++++----
 include/trace/events/btrfs.h | 22 +++++++++++++---------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 63d197724519..ff0b0be92d61 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 	unsigned long flags;
 
 	while (1) {
+		void *wtag;
+
 		spin_lock_irqsave(lock, flags);
 		if (list_empty(list))
 			break;
@@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 		spin_unlock_irqrestore(lock, flags);
 
 		/*
-		 * we don't want to call the ordered free functions
-		 * with the lock held though
+		 * We don't want to call the ordered free functions with the
+		 * lock held though. Save the work as tag for the trace event,
+		 * because the callback could free the structure.
 		 */
+		wtag = work;
 		work->ordered_free(work);
-		trace_btrfs_all_work_done(work);
+		trace_btrfs_all_work_done(wq->fs_info, wtag);
 	}
 	spin_unlock_irqrestore(lock, flags);
 }
@@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 static void normal_work_helper(struct btrfs_work *work)
 {
 	struct __btrfs_workqueue *wq;
+	void *wtag;
 	int need_order = 0;
 
 	/*
@@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work)
 	if (work->ordered_func)
 		need_order = 1;
 	wq = work->wq;
+	/* Safe for tracepoints in case work gets freed by the callback */
+	wtag = work;
 
 	trace_btrfs_work_sched(work);
 	thresh_exec_hook(wq);
@@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work)
 		run_ordered_work(wq);
 	}
 	if (!need_order)
-		trace_btrfs_all_work_done(work);
+		trace_btrfs_all_work_done(wq->fs_info, wtag);
 }
 
 void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index c14bed4ab097..b09225c77676 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1157,22 +1157,26 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
 
-/* For situiations that the work is freed */
+/*
+ * For situiations when the work is freed, we pass fs_info and a tag that that
+ * matches address of the work structure so it can be paired with the
+ * scheduling event.
+ */
 DECLARE_EVENT_CLASS(btrfs__work__done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work),
+	TP_ARGS(fs_info, wtag),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	void *,	work			)
+		__field(	void *,	wtag			)
 	),
 
-	TP_fast_assign_btrfs(btrfs_work_owner(work),
-		__entry->work		= work;
+	TP_fast_assign_btrfs(fs_info,
+		__entry->wtag		= wtag;
 	),
 
-	TP_printk_btrfs("work->%p", __entry->work)
+	TP_printk_btrfs("work->%p", __entry->wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_work_queued,
@@ -1191,9 +1195,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched,
 
 DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work)
+	TP_ARGS(fs_info, wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_ordered_sched,

From 92a1bf76a89ad338f00eb9a2c7689a3907fbcaad Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 17 Nov 2016 15:00:50 -0800
Subject: [PATCH 083/258] Btrfs: add 'inode' for extent map tracepoint

'inode' is an important field for btrfs_get_extent, lets trace it.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c             |  2 +-
 include/trace/events/btrfs.h | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a713d9d324b0..fab189c67eff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7059,7 +7059,7 @@ insert:
 	write_unlock(&em_tree->lock);
 out:
 
-	trace_btrfs_get_extent(root, em);
+	trace_btrfs_get_extent(root, inode, em);
 
 	btrfs_free_path(path);
 	if (trans) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b09225c77676..3048f5205363 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -184,14 +184,16 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
 
 TRACE_EVENT_CONDITION(btrfs_get_extent,
 
-	TP_PROTO(struct btrfs_root *root, struct extent_map *map),
+	TP_PROTO(struct btrfs_root *root, struct inode *inode,
+		 struct extent_map *map),
 
-	TP_ARGS(root, map),
+	TP_ARGS(root, inode, map),
 
 	TP_CONDITION(map),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  root_objectid	)
+		__field(	u64,  ino		)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
 		__field(	u64,  orig_start	)
@@ -204,7 +206,8 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 
 	TP_fast_assign_btrfs(root->fs_info,
 		__entry->root_objectid	= root->root_key.objectid;
-		__entry->start 		= map->start;
+		__entry->ino		= btrfs_ino(inode);
+		__entry->start		= map->start;
 		__entry->len		= map->len;
 		__entry->orig_start	= map->orig_start;
 		__entry->block_start	= map->block_start;
@@ -214,11 +217,12 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu, "
+	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
 		  "orig_start = %llu, block_start = %llu(%s), "
 		  "block_len = %llu, flags = %s, refs = %u, "
 		  "compress_type = %u",
 		  show_root_type(__entry->root_objectid),
+		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->orig_start,

From 7856654842bdbebc0fbcbf51573da5d70a787aba Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:10:10 -0800
Subject: [PATCH 084/258] Btrfs: add truncated_len for ordered extent
 tracepoints

This can help us monitor truncated ordered extents.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 3048f5205363..2026a89786b0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -263,6 +263,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__field(	int,  compress_type	)
 		__field(	int,  refs		)
 		__field(	u64,  root_objectid	)
+		__field(	u64,  truncated_len	)
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
@@ -277,10 +278,12 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->refs		= atomic_read(&ordered->refs);
 		__entry->root_objectid	=
 				BTRFS_I(inode)->root->root_key.objectid;
+		__entry->truncated_len	= ordered->truncated_len;
 	),
 
 	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
 		  "start = %llu, len = %llu, disk_len = %llu, "
+		  "truncated_len = %llu, "
 		  "bytes_left = %llu, flags = %s, compress_type = %d, "
 		  "refs = %d",
 		  show_root_type(__entry->root_objectid),
@@ -289,6 +292,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->disk_len,
+		  (unsigned long long)__entry->truncated_len,
 		  (unsigned long long)__entry->bytes_left,
 		  show_ordered_flags(__entry->flags),
 		  __entry->compress_type, __entry->refs)

From 562a7a07bf61e2949f7cbdb6ac7537ad9e2794d1 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 15:51:36 +0100
Subject: [PATCH 085/258] btrfs: make tracepoint format strings more compact

We've recently added the fsid to trace events, this makes the line quite
long. To reduce the it again, remove extra spaces around = and remove
",".

Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 112 +++++++++++++++++------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2026a89786b0..88d18a8ceb59 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -130,8 +130,8 @@ DECLARE_EVENT_CLASS(btrfs__inode,
 				BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
-		  "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu",
+	TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu "
+		  "disk_i_size=%llu last_trans=%llu logged_trans=%llu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->generation,
 		  (unsigned long)__entry->ino,
@@ -217,10 +217,10 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
-		  "orig_start = %llu, block_start = %llu(%s), "
-		  "block_len = %llu, flags = %s, refs = %u, "
-		  "compress_type = %u",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu "
+		  "orig_start=%llu block_start=%llu(%s) "
+		  "block_len=%llu flags=%s refs=%u "
+		  "compress_type=%u",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
@@ -281,11 +281,11 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->truncated_len	= ordered->truncated_len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
-		  "start = %llu, len = %llu, disk_len = %llu, "
-		  "truncated_len = %llu, "
-		  "bytes_left = %llu, flags = %s, compress_type = %d, "
-		  "refs = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu file_offset=%llu "
+		  "start=%llu len=%llu disk_len=%llu "
+		  "truncated_len=%llu "
+		  "bytes_left=%llu flags=%s compress_type=%d "
+		  "refs=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->file_offset,
@@ -362,10 +362,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, "
-		  "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, "
-		  "range_end = %llu, for_kupdate = %d, "
-		  "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu "
+		  "nr_to_write=%ld pages_skipped=%ld range_start=%llu "
+		  "range_end=%llu for_kupdate=%d "
+		  "for_reclaim=%d range_cyclic=%d writeback_index=%lu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, __entry->index,
 		  __entry->nr_to_write, __entry->pages_skipped,
@@ -408,8 +408,8 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
 			 BTRFS_I(page->mapping->host)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
-		  "end = %llu, uptodate = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu "
+		  "end=%llu uptodate=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->index,
 		  (unsigned long long)__entry->start,
@@ -441,7 +441,7 @@ TRACE_EVENT(btrfs_sync_file,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->parent,
 		  __entry->datasync)
@@ -492,9 +492,9 @@ TRACE_EVENT(btrfs_add_block_group,
 		__entry->create		= create;
 	),
 
-	TP_printk("%pU: block_group offset = %llu, size = %llu, "
-		  "flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
-		  "create = %d", __entry->fsid,
+	TP_printk("%pU: block_group offset=%llu size=%llu "
+		  "flags=%llu(%s) bytes_used=%llu bytes_super=%llu "
+		  "create=%d", __entry->fsid,
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
 		  (unsigned long long)__entry->flags,
@@ -543,9 +543,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), level = %d, "
-		  "type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) level=%d "
+		  "type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -608,9 +608,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, "
-		  "offset = %llu, type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) owner=%llu "
+		  "offset=%llu type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -665,7 +665,7 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 		__entry->is_data	= head_ref->is_data;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -729,8 +729,8 @@ DECLARE_EVENT_CLASS(btrfs__chunk,
 		__entry->root_objectid	= fs_info->chunk_root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), offset = %llu, size = %llu, "
-		  "num_stripes = %d, sub_stripes = %d, type = %s",
+	TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
+		  "num_stripes=%d sub_stripes=%d type=%s",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
@@ -779,8 +779,8 @@ TRACE_EVENT(btrfs_cow_block,
 		__entry->cow_level	= btrfs_header_level(cow);
 	),
 
-	TP_printk_btrfs("root = %llu(%s), refs = %d, orig_buf = %llu "
-		  "(orig_level = %d), cow_buf = %llu (cow_level = %d)",
+	TP_printk_btrfs("root=%llu(%s) refs=%d orig_buf=%llu "
+		  "(orig_level=%d) cow_buf=%llu (cow_level=%d)",
 		  show_root_type(__entry->root_objectid),
 		  __entry->refs,
 		  (unsigned long long)__entry->buf_start,
@@ -844,7 +844,7 @@ TRACE_EVENT(btrfs_trigger_flush,
 		__assign_str(reason, reason)
 	),
 
-	TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
+	TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
 		  __entry->fsid, __get_str(reason), __entry->flush,
 		  show_flush_action(__entry->flush),
 		  (unsigned long long)__entry->flags,
@@ -887,8 +887,8 @@ TRACE_EVENT(btrfs_flush_space,
 		__entry->ret		=	ret;
 	),
 
-	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
-		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
+	TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu "
+		  "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state,
 		  show_flush_state(__entry->state),
 		  (unsigned long long)__entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -913,7 +913,7 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu",
+	TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len)
@@ -952,7 +952,7 @@ TRACE_EVENT(find_free_extent,
 		__entry->data		= data;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), len = %Lu, empty_size = %Lu, flags = %Lu(%s)",
+	TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->num_bytes, __entry->empty_size, __entry->data,
 		  __print_flags((unsigned long)__entry->data, "|",
@@ -981,8 +981,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
-		  "start = %Lu, len = %Lu",
+	TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) "
+		  "start=%Lu len=%Lu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->bg_objectid,
 		  __entry->flags, __print_flags((unsigned long)__entry->flags,
@@ -1033,8 +1033,8 @@ TRACE_EVENT(btrfs_find_cluster,
 		__entry->min_bytes	= min_bytes;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
-		  " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu "
+		  "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS), __entry->start,
@@ -1055,7 +1055,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
 		__entry->bg_objectid	= block_group->key.objectid;
 	),
 
-	TP_printk_btrfs("block_group = %Lu", __entry->bg_objectid)
+	TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid)
 );
 
 TRACE_EVENT(btrfs_setup_cluster,
@@ -1083,8 +1083,8 @@ TRACE_EVENT(btrfs_setup_cluster,
 		__entry->bitmap		= bitmap;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
-		  "size = %Lu, max_size = %Lu, bitmap = %d",
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu "
+		  "size=%Lu max_size=%Lu bitmap=%d",
 		  __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -1111,7 +1111,7 @@ TRACE_EVENT(alloc_extent_state,
 		__entry->ip	= IP
 	),
 
-	TP_printk("state=%p; mask = %s; caller = %pS", __entry->state,
+	TP_printk("state=%p mask=%s caller=%pS", __entry->state,
 		  show_gfp_flags(__entry->mask), (void *)__entry->ip)
 );
 
@@ -1131,7 +1131,7 @@ TRACE_EVENT(free_extent_state,
 		__entry->ip = IP
 	),
 
-	TP_printk(" state=%p; caller = %pS", __entry->state,
+	TP_printk("state=%p caller=%pS", __entry->state,
 		  (void *)__entry->ip)
 );
 
@@ -1159,8 +1159,8 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		__entry->normal_work	= &work->normal_work;
 	),
 
-	TP_printk_btrfs("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p,"
-		  " ordered_free=%p",
+	TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p "
+		  "ordered_free=%p",
 		  __entry->work, __entry->normal_work, __entry->wq,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
@@ -1233,7 +1233,7 @@ DECLARE_EVENT_CLASS(btrfs__workqueue,
 		__entry->high		= high;
 	),
 
-	TP_printk_btrfs("name=%s%s, wq=%p", __get_str(name),
+	TP_printk_btrfs("name=%s%s wq=%p", __get_str(name),
 		  __print_flags(__entry->high, "",
 				{(WQ_HIGHPRI),	"-high"}),
 		  __entry->wq)
@@ -1288,7 +1288,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_data_map,
 		__entry->free_reserved	=	free_reserved;
 	),
 
-	TP_printk_btrfs("rootid=%llu, ino=%lu, free_reserved=%llu",
+	TP_printk_btrfs("rootid=%llu ino=%lu free_reserved=%llu",
 		  __entry->rootid, __entry->ino, __entry->free_reserved)
 );
 
@@ -1335,7 +1335,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
 		__entry->op		= op;
 	),
 
-	TP_printk_btrfs("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
+	TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s",
 		  __entry->rootid, __entry->ino, __entry->start, __entry->len,
 		  __entry->reserved,
 		  __print_flags((unsigned long)__entry->op, "",
@@ -1373,7 +1373,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
 		__entry->reserved	= reserved;
 	),
 
-	TP_printk_btrfs("root=%llu, reserved=%llu, op=free",
+	TP_printk_btrfs("root=%llu reserved=%llu op=free",
 		  __entry->ref_root, __entry->reserved)
 );
 
@@ -1400,7 +1400,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 		__entry->num_bytes	= rec->num_bytes;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes)
 );
@@ -1442,8 +1442,8 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
 		__entry->nr_new_roots	= nr_new_roots;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
-		  "nr_new_roots = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu "
+		  "nr_new_roots=%llu",
 		  __entry->bytenr,
 		  __entry->num_bytes,
 		  __entry->nr_old_roots,
@@ -1469,7 +1469,7 @@ TRACE_EVENT(qgroup_update_counters,
 		__entry->cur_new_count	= cur_new_count;
 	),
 
-	TP_printk_btrfs("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+	TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu",
 		  __entry->qgid,
 		  __entry->cur_old_count,
 		  __entry->cur_new_count)

From fac69d0efad08fc15e4dbfc116830782acc0dc9a Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Sat, 7 Jan 2017 10:38:31 +0100
Subject: [PATCH 086/258] x86/boot: Add missing declaration of string functions

Add the missing declarations of basic string functions to string.h to allow
a clean build.

Fixes: 5be865661516 ("String-handling functions for the new x86 setup code.")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Link: http://lkml.kernel.org/r/1483781911-21399-1-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/string.c | 1 +
 arch/x86/boot/string.h | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd583dce1..9e240fcba784 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include "ctype.h"
+#include "string.h"
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 725e820602b1..113588ddb43f 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len);
 #define memset(d,c,l) __builtin_memset(d,c,l)
 #define memcmp	__builtin_memcmp
 
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+					  unsigned int base);
+
 #endif /* BOOT_STRING_H */

From 02c5c03283c52157d336abf5e44ffcda10579fbf Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Tue, 27 Dec 2016 12:05:05 +0800
Subject: [PATCH 087/258] ASoC: rt5645: set sel_i2s_pre_div1 to 2

The i2s clock pre-divider 1 is used for both i2s1 and sysclk.
The i2s1 is usually used for the main i2s and the pre-divider
will be set in hw_params function.

However, if i2s2 is used, the pre-divider is not set in the hw_params
function and the default value of i2s clock pre-divider 1 is too high
for sysclk and DMIC usage. Fix by overriding default divider value to 2.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=95681
Tested-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5645.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 10c2a564a715..1ac96ef9ee20 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3833,6 +3833,9 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
 		}
 	}
 
+	regmap_update_bits(rt5645->regmap, RT5645_ADDA_CLK1,
+		RT5645_I2S_PD1_MASK, RT5645_I2S_PD1_2);
+
 	if (rt5645->pdata.jd_invert) {
 		regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2,
 			RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV);

From 4e2da44691cffbfffb1535f478d19bc2dca3e62b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:10 +0100
Subject: [PATCH 088/258] USB: serial: ch341: fix initial modem-control state

DTR and RTS will be asserted by the tty-layer when the port is opened
and deasserted on close (if HUPCL is set). Make sure the initial state
is not-asserted before the port is first opened as well.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 2597b83a8ae2..d133e72fe888 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -258,7 +258,6 @@ static int ch341_port_probe(struct usb_serial_port *port)
 
 	spin_lock_init(&priv->lock);
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(port->serial->dev, priv);
 	if (r < 0)

From a20047f36e2f6a1eea4f1fd261aaa55882369868 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:11 +0100
Subject: [PATCH 089/258] USB: serial: ch341: fix open and resume after B0

The private baud_rate variable is used to configure the port at open and
reset-resume and must never be set to (and left at) zero or reset-resume
and all further open attempts will fail.

Fixes: aa91def41a7b ("USB: ch341: set tty baud speed according to tty
struct")
Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index d133e72fe888..6279df905c14 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -355,7 +355,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 
 	baud_rate = tty_get_baud_rate(tty);
 
-	priv->baud_rate = baud_rate;
 	ctrl = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX;
 
 	switch (C_CSIZE(tty)) {
@@ -388,6 +387,9 @@ static void ch341_set_termios(struct tty_struct *tty,
 		spin_lock_irqsave(&priv->lock, flags);
 		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
 		spin_unlock_irqrestore(&priv->lock, flags);
+
+		priv->baud_rate = baud_rate;
+
 		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);

From 030ee7ae52a46a2be52ccc8242c4a330aba8d38e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:12 +0100
Subject: [PATCH 090/258] USB: serial: ch341: fix modem-control and B0 handling

The modem-control signals are managed by the tty-layer during open and
should not be asserted prematurely when set_termios is called from
driver open.

Also make sure that the signals are asserted only when changing speed
from B0.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 6279df905c14..0cc5056b304d 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -384,10 +384,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 		ctrl |= CH341_LCR_STOP_BITS_2;
 
 	if (baud_rate) {
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
-		spin_unlock_irqrestore(&priv->lock, flags);
-
 		priv->baud_rate = baud_rate;
 
 		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
@@ -395,14 +391,16 @@ static void ch341_set_termios(struct tty_struct *tty,
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);
 		}
-	} else {
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS);
-		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
-	ch341_set_handshake(port->serial->dev, priv->line_control);
+	spin_lock_irqsave(&priv->lock, flags);
+	if (C_BAUD(tty) == B0)
+		priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS);
+	else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
+		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
+	spin_unlock_irqrestore(&priv->lock, flags);
 
+	ch341_set_handshake(port->serial->dev, priv->line_control);
 }
 
 static void ch341_break_ctl(struct tty_struct *tty, int break_state)

From f2950b78547ffb8475297ada6b92bc2d774d5461 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:13 +0100
Subject: [PATCH 091/258] USB: serial: ch341: fix open error handling

Make sure to stop the interrupt URB before returning on errors during
open.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 0cc5056b304d..8f41d4385f1c 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -319,7 +319,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
-		goto out;
+		return r;
 
 	if (tty)
 		ch341_set_termios(tty, port, NULL);
@@ -329,12 +329,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 	if (r) {
 		dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n",
 			__func__, r);
-		goto out;
+		return r;
 	}
 
 	r = usb_serial_generic_open(tty, port);
+	if (r)
+		goto err_kill_interrupt_urb;
 
-out:	return r;
+	return 0;
+
+err_kill_interrupt_urb:
+	usb_kill_urb(port->interrupt_in_urb);
+
+	return r;
 }
 
 /* Old_termios contains the original termios settings and

From ce5e292828117d1b71cbd3edf9e9137cf31acd30 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:14 +0100
Subject: [PATCH 092/258] USB: serial: ch341: fix resume after reset

Fix reset-resume handling which failed to resubmit the read and
interrupt URBs, thereby leaving a port that was open before suspend in a
broken state until closed and reopened.

Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after
resume")
Fixes: 2bfd1c96a9fb ("USB: serial: ch341: remove reset_resume callback")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8f41d4385f1c..5343d65f3b52 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -582,14 +582,23 @@ static int ch341_tiocmget(struct tty_struct *tty)
 
 static int ch341_reset_resume(struct usb_serial *serial)
 {
-	struct ch341_private *priv;
-
-	priv = usb_get_serial_port_data(serial->port[0]);
+	struct usb_serial_port *port = serial->port[0];
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	int ret;
 
 	/* reconfigure ch341 serial port after bus-reset */
 	ch341_configure(serial->dev, priv);
 
-	return 0;
+	if (tty_port_initialized(&port->port)) {
+		ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO);
+		if (ret) {
+			dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return usb_serial_generic_resume(serial);
 }
 
 static struct usb_serial_driver ch341_device = {

From 3cca8624b6624e7ffb87dcd8a0a05bef9b50e97b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:15 +0100
Subject: [PATCH 093/258] USB: serial: ch341: fix line settings after
 reset-resume

A recent change added support for modifying the default line-control
settings, but did not make sure that the modified settings were used as
part of reconfiguration after a device has been reset during resume.

This caused a port that was open before suspend to be unusable until
being closed and reopened.

Fixes: ba781bdf8662 ("USB: serial: ch341: add support for parity, frame
length, stop bits")
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 5343d65f3b52..eabdd05a2147 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -95,6 +95,7 @@ struct ch341_private {
 	unsigned baud_rate; /* set baud rate */
 	u8 line_control; /* set line control value RTS/DTR */
 	u8 line_status; /* active status of modem control inputs */
+	u8 lcr;
 };
 
 static void ch341_set_termios(struct tty_struct *tty,
@@ -232,7 +233,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	r = ch341_init_set_baudrate(dev, priv, 0);
+	r = ch341_init_set_baudrate(dev, priv, priv->lcr);
 	if (r < 0)
 		goto out;
 
@@ -397,6 +398,8 @@ static void ch341_set_termios(struct tty_struct *tty,
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);
+		} else if (r == 0) {
+			priv->lcr = ctrl;
 		}
 	}
 

From 55fa15b5987db22b4f35d3f0798928c126be5f1c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:16 +0100
Subject: [PATCH 094/258] USB: serial: ch341: fix baud rate and line-control
 handling

Revert to using direct register writes to set the divisor and
line-control registers.

A recent change switched to using the init vendor command to update
these registers, something which also enabled support for CH341A
devices. It turns out that simply setting bit 7 in the divisor register
is sufficient to support CH341A and specifically prevent data from being
buffered until a full endpoint-size packet (32 bytes) has been received.

Using the init command also had the side-effect of temporarily
deasserting the DTR/RTS signals on every termios change (including
initialisation on open) something which for example could cause problems
in setups where DTR is used to trigger a reset.

Fixes: 4e46c410e050 ("USB: serial: ch341: reinitialize chip on
reconfiguration")
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index eabdd05a2147..8d7b0847109b 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -133,8 +133,8 @@ static int ch341_control_in(struct usb_device *dev,
 	return r;
 }
 
-static int ch341_init_set_baudrate(struct usb_device *dev,
-				   struct ch341_private *priv, unsigned ctrl)
+static int ch341_set_baudrate_lcr(struct usb_device *dev,
+				  struct ch341_private *priv, u8 lcr)
 {
 	short a;
 	int r;
@@ -157,9 +157,19 @@ static int ch341_init_set_baudrate(struct usb_device *dev,
 	factor = 0x10000 - factor;
 	a = (factor & 0xff00) | divisor;
 
-	/* 0x9c is "enable SFR_UART Control register and timer" */
-	r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT,
-			      0x9c | (ctrl << 8), a | 0x80);
+	/*
+	 * CH341A buffers data until a full endpoint-size packet (32 bytes)
+	 * has been received unless bit 7 is set.
+	 */
+	a |= BIT(7);
+
+	r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x1312, a);
+	if (r)
+		return r;
+
+	r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x2518, lcr);
+	if (r)
+		return r;
 
 	return r;
 }
@@ -233,7 +243,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	r = ch341_init_set_baudrate(dev, priv, priv->lcr);
+	r = ch341_set_baudrate_lcr(dev, priv, priv->lcr);
 	if (r < 0)
 		goto out;
 
@@ -394,7 +404,7 @@ static void ch341_set_termios(struct tty_struct *tty,
 	if (baud_rate) {
 		priv->baud_rate = baud_rate;
 
-		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
+		r = ch341_set_baudrate_lcr(port->serial->dev, priv, ctrl);
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);

From 5149fd327f16e393c1d04fa5325ab072c32472bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:36:30 -0800
Subject: [PATCH 095/258] xfs: bump up reserved blocks in xfs_alloc_set_aside

Setting aside 4 blocks globally for bmbt splits isn't all that useful,
as different threads can allocate space in parallel.  Bump it to 4
blocks per AG to allow each thread that is currently doing an
allocation to dip into it separately.  Without that we may no have
enough reserved blocks if there are enough parallel transactions
in an almost out space file system that all run into bmap btree
splits.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5050056a0b06..0a46f8488b8d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -95,10 +95,7 @@ unsigned int
 xfs_alloc_set_aside(
 	struct xfs_mount	*mp)
 {
-	unsigned int		blocks;
-
-	blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
-	return blocks;
+	return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
 }
 
 /*

From 255c516278175a6dc7037d1406307f35237d8688 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:36:19 -0800
Subject: [PATCH 096/258] xfs: fix bogus minleft manipulations

We can't just set minleft to 0 when we're low on space - that's exactly
what we need minleft for: to protect space in the AG for btree block
allocations when we are low on free space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c      | 24 +++++++-----------------
 fs/xfs/libxfs/xfs_bmap.c       |  3 ---
 fs/xfs/libxfs/xfs_bmap_btree.c |  3 +--
 3 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0a46f8488b8d..fe925702c955 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2635,12 +2635,10 @@ xfs_alloc_vextent(
 	xfs_agblock_t	agsize;	/* allocation group size */
 	int		error;
 	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
-	xfs_extlen_t	minleft;/* minimum left value, temp copy */
 	xfs_mount_t	*mp;	/* mount structure pointer */
 	xfs_agnumber_t	sagno;	/* starting allocation group number */
 	xfs_alloctype_t	type;	/* input allocation type */
 	int		bump_rotor = 0;
-	int		no_min = 0;
 	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
 
 	mp = args->mp;
@@ -2669,7 +2667,6 @@ xfs_alloc_vextent(
 		trace_xfs_alloc_vextent_badargs(args);
 		return 0;
 	}
-	minleft = args->minleft;
 
 	switch (type) {
 	case XFS_ALLOCTYPE_THIS_AG:
@@ -2680,9 +2677,7 @@ xfs_alloc_vextent(
 		 */
 		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
 		args->pag = xfs_perag_get(mp, args->agno);
-		args->minleft = 0;
 		error = xfs_alloc_fix_freelist(args, 0);
-		args->minleft = minleft;
 		if (error) {
 			trace_xfs_alloc_vextent_nofix(args);
 			goto error0;
@@ -2747,9 +2742,7 @@ xfs_alloc_vextent(
 		 */
 		for (;;) {
 			args->pag = xfs_perag_get(mp, args->agno);
-			if (no_min) args->minleft = 0;
 			error = xfs_alloc_fix_freelist(args, flags);
-			args->minleft = minleft;
 			if (error) {
 				trace_xfs_alloc_vextent_nofix(args);
 				goto error0;
@@ -2789,20 +2782,17 @@ xfs_alloc_vextent(
 			 * or switch to non-trylock mode.
 			 */
 			if (args->agno == sagno) {
-				if (no_min == 1) {
+				if (flags == 0) {
 					args->agbno = NULLAGBLOCK;
 					trace_xfs_alloc_vextent_allfailed(args);
 					break;
 				}
-				if (flags == 0) {
-					no_min = 1;
-				} else {
-					flags = 0;
-					if (type == XFS_ALLOCTYPE_START_BNO) {
-						args->agbno = XFS_FSB_TO_AGBNO(mp,
-							args->fsbno);
-						args->type = XFS_ALLOCTYPE_NEAR_BNO;
-					}
+
+				flags = 0;
+				if (type == XFS_ALLOCTYPE_START_BNO) {
+					args->agbno = XFS_FSB_TO_AGBNO(mp,
+						args->fsbno);
+					args->type = XFS_ALLOCTYPE_NEAR_BNO;
 				}
 			}
 			xfs_perag_put(args->pag);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760bc3b2536..44773c9eb957 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3812,7 +3812,6 @@ xfs_bmap_btalloc(
 		args.fsbno = 0;
 		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		args.total = ap->minlen;
-		args.minleft = 0;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
 		ap->dfops->dop_low = true;
@@ -4344,8 +4343,6 @@ xfs_bmapi_allocate(
 	if (error)
 		return error;
 
-	if (bma->dfops->dop_low)
-		bma->minleft = 0;
 	if (bma->cur)
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
 	if (bma->blkno == NULLFSBLOCK)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d6330c297ca0..d9be241fc86f 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -502,12 +502,11 @@ try_another_ag:
 	if (args.fsbno == NULLFSBLOCK && args.minleft) {
 		/*
 		 * Could not find an AG with enough free space to satisfy
-		 * a full btree split.  Try again without minleft and if
+		 * a full btree split.  Try again and if
 		 * successful activate the lowspace algorithm.
 		 */
 		args.fsbno = 0;
 		args.type = XFS_ALLOCTYPE_FIRST_AG;
-		args.minleft = 0;
 		error = xfs_alloc_vextent(&args);
 		if (error)
 			goto error0;

From 54fee133ad59c87ab01dd84ab3e9397134b32acb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:44:30 -0800
Subject: [PATCH 097/258] xfs: adjust allocation length in
 xfs_alloc_space_available

We must decide in xfs_alloc_fix_freelist if we can perform an
allocation from a given AG is possible or not based on the available
space, and should not fail the allocation past that point on a
healthy file system.

But currently we have two additional places that second-guess
xfs_alloc_fix_freelist: xfs_alloc_ag_vextent tries to adjust the
maxlen parameter to remove the reservation before doing the
allocation (but ignores the various minium freespace requirements),
and xfs_alloc_fix_minleft tries to fix up the allocated length
after we've found an extent, but ignores the reservations and also
doesn't take the AGFL into account (and thus fails allocations
for not matching minlen in some cases).

Remove all these later fixups and just correct the maxlen argument
inside xfs_alloc_fix_freelist once we have the AGF buffer locked.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 81 ++++++++-------------------------------
 fs/xfs/libxfs/xfs_alloc.h |  2 +-
 2 files changed, 18 insertions(+), 65 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fe925702c955..f2e7eb6e5243 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -362,35 +362,11 @@ xfs_alloc_fix_len(
 		return;
 	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 	ASSERT(rlen % args->prod == args->mod);
+	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
+		rlen + args->minleft);
 	args->len = rlen;
 }
 
-/*
- * Fix up length if there is too little space left in the a.g.
- * Return 1 if ok, 0 if too little, should give up.
- */
-STATIC int
-xfs_alloc_fix_minleft(
-	xfs_alloc_arg_t	*args)		/* allocation argument structure */
-{
-	xfs_agf_t	*agf;		/* a.g. freelist header */
-	int		diff;		/* free space difference */
-
-	if (args->minleft == 0)
-		return 1;
-	agf = XFS_BUF_TO_AGF(args->agbp);
-	diff = be32_to_cpu(agf->agf_freeblks)
-		- args->len - args->minleft;
-	if (diff >= 0)
-		return 1;
-	args->len += diff;		/* shrink the allocated space */
-	/* casts to (int) catch length underflows */
-	if ((int)args->len >= (int)args->minlen)
-		return 1;
-	args->agbno = NULLAGBLOCK;
-	return 0;
-}
-
 /*
  * Update the two btrees, logically removing from freespace the extent
  * starting at rbno, rlen blocks.  The extent is contained within the
@@ -686,8 +662,6 @@ xfs_alloc_ag_vextent(
 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 {
 	int		error=0;
-	xfs_extlen_t	reservation;
-	xfs_extlen_t	oldmax;
 
 	ASSERT(args->minlen > 0);
 	ASSERT(args->maxlen > 0);
@@ -695,20 +669,6 @@ xfs_alloc_ag_vextent(
 	ASSERT(args->mod < args->prod);
 	ASSERT(args->alignment > 0);
 
-	/*
-	 * Clamp maxlen to the amount of free space minus any reservations
-	 * that have been made.
-	 */
-	oldmax = args->maxlen;
-	reservation = xfs_ag_resv_needed(args->pag, args->resv);
-	if (args->maxlen > args->pag->pagf_freeblks - reservation)
-		args->maxlen = args->pag->pagf_freeblks - reservation;
-	if (args->maxlen == 0) {
-		args->agbno = NULLAGBLOCK;
-		args->maxlen = oldmax;
-		return 0;
-	}
-
 	/*
 	 * Branch to correct routine based on the type.
 	 */
@@ -728,8 +688,6 @@ xfs_alloc_ag_vextent(
 		/* NOTREACHED */
 	}
 
-	args->maxlen = oldmax;
-
 	if (error || args->agbno == NULLAGBLOCK)
 		return error;
 
@@ -838,9 +796,6 @@ xfs_alloc_ag_vextent_exact(
 	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 						- args->agbno;
 	xfs_alloc_fix_len(args);
-	if (!xfs_alloc_fix_minleft(args))
-		goto not_found;
-
 	ASSERT(args->agbno + args->len <= tend);
 
 	/*
@@ -1146,12 +1101,7 @@ restart:
 		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 		ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 		args->len = blen;
-		if (!xfs_alloc_fix_minleft(args)) {
-			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-			trace_xfs_alloc_near_nominleft(args);
-			return 0;
-		}
-		blen = args->len;
+
 		/*
 		 * We are allocating starting at bnew for blen blocks.
 		 */
@@ -1343,12 +1293,6 @@ restart:
 	 */
 	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 	xfs_alloc_fix_len(args);
-	if (!xfs_alloc_fix_minleft(args)) {
-		trace_xfs_alloc_near_nominleft(args);
-		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-		return 0;
-	}
 	rlen = args->len;
 	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
 				     args->datatype, ltbnoa, ltlena, &ltnew);
@@ -1550,8 +1494,6 @@ restart:
 	}
 	xfs_alloc_fix_len(args);
 
-	if (!xfs_alloc_fix_minleft(args))
-		goto out_nominleft;
 	rlen = args->len;
 	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
 	/*
@@ -2070,10 +2012,20 @@ xfs_alloc_space_available(
 
 	/* do we have enough free space remaining for the allocation? */
 	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
-			  reservation - min_free - args->total);
-	if (available < (int)args->minleft || available <= 0)
+			  reservation - min_free - args->minleft);
+	if (available < (int)args->total)
 		return false;
 
+	/*
+	 * Clamp maxlen to the amount of free space available for the actual
+	 * extent allocation.
+	 */
+	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
+		args->maxlen = available;
+		ASSERT(args->maxlen > 0);
+		ASSERT(args->maxlen >= args->minlen);
+	}
+
 	return true;
 }
 
@@ -2119,7 +2071,8 @@ xfs_alloc_fix_freelist(
 	}
 
 	need = xfs_alloc_min_freelist(mp, pag);
-	if (!xfs_alloc_space_available(args, need, flags))
+	if (!xfs_alloc_space_available(args, need, flags |
+			XFS_ALLOC_FLAG_CHECK))
 		goto out_agbp_relse;
 
 	/*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7c404a6b0ae3..1d0f48a501a3 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t;
 #define	XFS_ALLOC_FLAG_FREEING	0x00000002  /* indicate caller is freeing extents*/
 #define	XFS_ALLOC_FLAG_NORMAP	0x00000004  /* don't modify the rmapbt */
 #define	XFS_ALLOC_FLAG_NOSHRINK	0x00000008  /* don't shrink the freelist */
-
+#define	XFS_ALLOC_FLAG_CHECK	0x00000010  /* test only, don't modify args */
 
 /*
  * Argument structure for xfs_alloc routines.

From 12ef830198b0d71668eb9b59f9ba69d32951a48a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:39:35 -0800
Subject: [PATCH 098/258] xfs: don't rely on ->total in
 xfs_alloc_space_available

->total is a bit of an odd parameter passed down to the low-level
allocator all the way from the high-level callers.  It's supposed to
contain the maximum number of blocks to be allocated for the whole
transaction [1].

But in xfs_iomap_write_allocate we only convert existing delayed
allocations and thus only have a minimal block reservation for the
current transaction, so xfs_alloc_space_available can't use it for
the allocation decisions.  Use the maximum of args->total and the
calculated block requirement to make a decision.  We probably should
get rid of args->total eventually and instead apply ->minleft more
broadly, but that will require some extensive changes all over.

[1] which creates lots of confusion as most callers don't decrement it
once doing a first allocation.  But that's for a separate series.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index f2e7eb6e5243..9f06a211e157 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1995,7 +1995,7 @@ xfs_alloc_space_available(
 	int			flags)
 {
 	struct xfs_perag	*pag = args->pag;
-	xfs_extlen_t		longest;
+	xfs_extlen_t		alloc_len, longest;
 	xfs_extlen_t		reservation; /* blocks that are still reserved */
 	int			available;
 
@@ -2005,15 +2005,16 @@ xfs_alloc_space_available(
 	reservation = xfs_ag_resv_needed(pag, args->resv);
 
 	/* do we have enough contiguous free space for the allocation? */
+	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
 	longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
 			reservation);
-	if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+	if (longest < alloc_len)
 		return false;
 
 	/* do we have enough free space remaining for the allocation? */
 	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
 			  reservation - min_free - args->minleft);
-	if (available < (int)args->total)
+	if (available < (int)max(args->total, alloc_len))
 		return false;
 
 	/*

From 84a4620cfe97c9d57e39b2369bfb77faff55063d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:41:33 -0800
Subject: [PATCH 099/258] xfs: don't print warnings when xfs_log_force fails

There are only two reasons for xfs_log_force / xfs_log_force_lsn to fail:
one is an I/O error, for which xlog_bdstrat already logs a warning, and
the second is an already shutdown log due to a previous I/O errors.  In
the latter case we'll already have a previous indication for the actual
error, but the large stream of misleading warnings from xfs_log_force
will probably scroll it out of the message buffer.

Simply removing the warnings thus makes the XFS log reporting significantly
better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c39ac14ff540..b1469f0a91a6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3317,12 +3317,8 @@ xfs_log_force(
 	xfs_mount_t	*mp,
 	uint		flags)
 {
-	int	error;
-
 	trace_xfs_log_force(mp, 0, _RET_IP_);
-	error = _xfs_log_force(mp, flags, NULL);
-	if (error)
-		xfs_warn(mp, "%s: error %d returned.", __func__, error);
+	_xfs_log_force(mp, flags, NULL);
 }
 
 /*
@@ -3466,12 +3462,8 @@ xfs_log_force_lsn(
 	xfs_lsn_t	lsn,
 	uint		flags)
 {
-	int	error;
-
 	trace_xfs_log_force(mp, lsn, _RET_IP_);
-	error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
-	if (error)
-		xfs_warn(mp, "%s: error %d returned.", __func__, error);
+	_xfs_log_force_lsn(mp, lsn, flags, NULL);
 }
 
 /*

From 32cd7cbbacf700885a2316275f188f2d5739b5f4 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Fri, 6 Jan 2017 19:31:35 -0500
Subject: [PATCH 100/258] md/raid5: Use correct IS_ERR() variation on pointer
 check

This fixes a build error on certain architectures, such as ppc64.

Fixes: 6995f0b247e("md: takeover should clear unrelated bits")
Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7b1da6e95a56..36c13e4be9c9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7831,7 +7831,7 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
 	mddev->new_chunk_sectors = chunksect;
 
 	ret = setup_conf(mddev);
-	if (!IS_ERR_VALUE(ret))
+	if (!IS_ERR(ret))
 		mddev_clear_unsupported_flags(mddev,
 			UNSUPPORTED_MDDEV_FLAGS);
 	return ret;

From 5dedade6dfa243c130b85d1e4daba6f027805033 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:43 +0100
Subject: [PATCH 101/258] x86/CPU: Add native CPUID variants returning a single
 datum

... similarly to the cpuid_<reg>() variants.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-2-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/processor.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eaf100508c36..1be64da0384e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	    : "memory");
 }
 
+#define native_cpuid_reg(reg)					\
+static inline unsigned int native_cpuid_##reg(unsigned int op)	\
+{								\
+	unsigned int eax = op, ebx, ecx = 0, edx;		\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum.
+ */
+native_cpuid_reg(eax)
+native_cpuid_reg(ebx)
+native_cpuid_reg(ecx)
+native_cpuid_reg(edx)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));

From f3e2a51f568d9f33370f4e8bb05669a34223241a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:44 +0100
Subject: [PATCH 102/258] x86/microcode: Use native CPUID to tickle out
 microcode revision

Intel supplies the microcode revision value in MSR 0x8b
(IA32_BIOS_SIGN_ID) after CPUID(1) has been executed. Execute it each
time before reading that MSR.

It used to do sync_core() which did do CPUID but

  c198b121b1a1 ("x86/asm: Rewrite sync_core() to use IRET-to-self")

changed the sync_core() implementation so we better make the microcode
loading case explicit, as the SDM documents it.

Reported-and-tested-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-3-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c           |  2 +-
 arch/x86/kernel/cpu/microcode/intel.c | 26 +++-----------------------
 2 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d2bb03..2d49aa949fa1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -83,7 +83,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
 		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 		/* Required by the SDM */
-		sync_core();
+		native_cpuid_eax(1);
 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
 	}
 
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index b624b54912e1..f79249fab389 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -368,26 +368,6 @@ next:
 	return patch;
 }
 
-static void cpuid_1(void)
-{
-	/*
-	 * According to the Intel SDM, Volume 3, 9.11.7:
-	 *
-	 *   CPUID returns a value in a model specific register in
-	 *   addition to its usual register return values. The
-	 *   semantics of CPUID cause it to deposit an update ID value
-	 *   in the 64-bit model-specific register at address 08BH
-	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
-	 *   processor, the value in the MSR remains unmodified.
-	 *
-	 * Use native_cpuid -- this code runs very early and we don't
-	 * want to mess with paravirt.
-	 */
-	unsigned int eax = 1, ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-}
-
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -413,7 +393,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -613,7 +593,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -825,7 +805,7 @@ static int apply_microcode_intel(int cpu)
 	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);

From 4167709bbf826512a52ebd6aafda2be104adaec9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:45 +0100
Subject: [PATCH 103/258] x86/microcode/intel: Add a helper which gives the
 microcode revision

Since on Intel we're required to do CPUID(1) first, before reading
the microcode revision MSR, let's add a special helper which does the
required steps so that we don't forget to do them next time, when we
want to read the microcode revision.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-4-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/microcode_intel.h | 15 +++++++++
 arch/x86/kernel/cpu/intel.c            | 11 ++-----
 arch/x86/kernel/cpu/microcode/intel.c  | 43 ++++++++------------------
 3 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 195becc6f780..e793fc9a9b20 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,6 +52,21 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2d49aa949fa1..203f860d2ab3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -14,6 +14,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
+#include <asm/microcode_intel.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
-	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
-		unsigned lower_word;
-
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		/* Required by the SDM */
-		native_cpuid_eax(1);
-		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
-	}
+	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
+		c->microcode = intel_get_microcode_revision();
 
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f79249fab389..faec8fa68ffd 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -390,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
+	csig.rev = intel_get_microcode_revision();
 
 	uci->cpu_sig = csig;
 	uci->valid = 1;
@@ -582,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci)
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
 	struct microcode_intel *mc;
-	unsigned int val[2];
+	u32 rev;
 
 	mc = uci->mc;
 	if (!mc)
@@ -590,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc->hdr.rev)
+	rev = intel_get_microcode_revision();
+	if (rev != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
 	/* Flush global tlb. This is precaution. */
 	flush_tlb_early();
 #endif
-	uci->cpu_sig.rev = val[1];
+	uci->cpu_sig.rev = rev;
 
 	if (early)
 		print_ucode(uci);
@@ -784,8 +772,8 @@ static int apply_microcode_intel(int cpu)
 	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
 	struct cpuinfo_x86 *c;
-	unsigned int val[2];
 	static int prev_rev;
+	u32 rev;
 
 	/* We should bind the task to the CPU */
 	if (WARN_ON(raw_smp_processor_id() != cpu))
@@ -802,33 +790,28 @@ static int apply_microcode_intel(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
+	rev = intel_get_microcode_revision();
 
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	if (val[1] != mc->hdr.rev) {
+	if (rev != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
 		       cpu, mc->hdr.rev);
 		return -1;
 	}
 
-	if (val[1] != prev_rev) {
+	if (rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
-			val[1],
+			rev,
 			mc->hdr.date & 0xffff,
 			mc->hdr.date >> 24,
 			(mc->hdr.date >> 16) & 0xff);
-		prev_rev = val[1];
+		prev_rev = rev;
 	}
 
 	c = &cpu_data(cpu);
 
-	uci->cpu_sig.rev = val[1];
-	c->microcode = val[1];
+	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
 
 	return 0;
 }

From 9fcf5ba2ef908af916e9002891fbbca20ce4dc98 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 9 Jan 2017 12:41:46 +0100
Subject: [PATCH 104/258] x86/microcode/intel: Fix allocation size of struct
 ucode_patch

We allocate struct ucode_patch here. @size is the size of microcode data
and used for kmemdup() later in this function.

Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/7a730dc9-ac17-35c4-fe76-dfc94e5ecd95@ce.jp.nec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index faec8fa68ffd..943486589757 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 

From 2e86222c67bb5d942da68e8415749b32db208534 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 9 Jan 2017 12:41:47 +0100
Subject: [PATCH 105/258] x86/microcode/intel: Use correct buffer size for
 saving microcode data

In generic_load_microcode(), curr_mc_size is the size of the last
allocated buffer and since we have this performance "optimization"
there to vmalloc a new buffer only when the current one is bigger,
curr_mc_size ends up becoming the size of the biggest buffer we've seen
so far.

However, we end up saving the microcode patch which matches our CPU
and its size is not curr_mc_size but the respective mc_size during the
iteration while we're staring at it.

So save that mc_size into a separate variable and use it to store the
previously found microcode buffer.

Without this fix, we could get oops like this:

  BUG: unable to handle kernel paging request at ffffc9000e30f000
  IP: __memcpy+0x12/0x20
  ...
  Call Trace:
  ? kmemdup+0x43/0x60
  __alloc_microcode_buf+0x44/0x70
  save_microcode_patch+0xd4/0x150
  generic_load_microcode+0x1b8/0x260
  request_microcode_user+0x15/0x20
  microcode_write+0x91/0x100
  __vfs_write+0x34/0x120
  vfs_write+0xc1/0x130
  SyS_write+0x56/0xc0
  do_syscall_64+0x6c/0x160
  entry_SYSCALL64_slow_path+0x25/0x25

Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/4f33cbfd-44f2-9bed-3b66-7446cd14256f@ce.jp.nec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/intel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 943486589757..3f329b74e040 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -823,7 +823,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
-	unsigned int curr_mc_size = 0;
+	unsigned int curr_mc_size = 0, new_mc_size = 0;
 	unsigned int csig, cpf;
 
 	while (leftover) {
@@ -864,6 +864,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
+			new_mc_size = mc_size;
 			mc = NULL;	/* trigger new vmalloc */
 		}
 
@@ -889,7 +890,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	 * permanent memory. So it will be loaded early when a CPU is hot added
 	 * or resumes.
 	 */
-	save_mc_for_early(new_mc, curr_mc_size);
+	save_mc_for_early(new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);

From 3895dbf8985f656675b5bde610723a29cbce3fa7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Jan 2017 14:18:43 +1300
Subject: [PATCH 106/258] mnt: Protect the mountpoint hashtable with mount_lock

Protecting the mountpoint hashtable with namespace_sem was sufficient
until a call to umount_mnt was added to mntput_no_expire.  At which
point it became possible for multiple calls of put_mountpoint on
the same hash chain to happen on the same time.

Kristen Johansen <kjlx@templeofstupid.com> reported:
> This can cause a panic when simultaneous callers of put_mountpoint
> attempt to free the same mountpoint.  This occurs because some callers
> hold the mount_hash_lock, while others hold the namespace lock.  Some
> even hold both.
>
> In this submitter's case, the panic manifested itself as a GP fault in
> put_mountpoint() when it called hlist_del() and attempted to dereference
> a m_hash.pprev that had been poisioned by another thread.

Al Viro observed that the simple fix is to switch from using the namespace_sem
to the mount_lock to protect the mountpoint hash table.

I have taken Al's suggested patch moved put_mountpoint in pivot_root
(instead of taking mount_lock an additional time), and have replaced
new_mountpoint with get_mountpoint a function that does the hash table
lookup and addition under the mount_lock.   The introduction of get_mounptoint
ensures that only the mount_lock is needed to manipulate the mountpoint
hashtable.

d_set_mounted is modified to only set DCACHE_MOUNTED if it is not
already set.  This allows get_mountpoint to use the setting of
DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry
happens exactly once.

Cc: stable@vger.kernel.org
Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts")
Reported-by: Krister Johansen <kjlx@templeofstupid.com>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/dcache.c    |  7 ++++--
 fs/namespace.c | 68 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 769903dbc19d..95d71eda8142 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry)
 	}
 	spin_lock(&dentry->d_lock);
 	if (!d_unlinked(dentry)) {
-		dentry->d_flags |= DCACHE_MOUNTED;
-		ret = 0;
+		ret = -EBUSY;
+		if (!d_mountpoint(dentry)) {
+			dentry->d_flags |= DCACHE_MOUNTED;
+			ret = 0;
+		}
 	}
  	spin_unlock(&dentry->d_lock);
 out:
diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259e064f..487ba30bb5c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 	return NULL;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
 {
-	struct hlist_head *chain = mp_hash(dentry);
-	struct mountpoint *mp;
+	struct mountpoint *mp, *new = NULL;
 	int ret;
 
-	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-	if (!mp)
-		return ERR_PTR(-ENOMEM);
-
-	ret = d_set_mounted(dentry);
-	if (ret) {
-		kfree(mp);
-		return ERR_PTR(ret);
+	if (d_mountpoint(dentry)) {
+mountpoint:
+		read_seqlock_excl(&mount_lock);
+		mp = lookup_mountpoint(dentry);
+		read_sequnlock_excl(&mount_lock);
+		if (mp)
+			goto done;
 	}
 
-	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
-	INIT_HLIST_HEAD(&mp->m_list);
+	if (!new)
+		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+
+	/* Exactly one processes may set d_mounted */
+	ret = d_set_mounted(dentry);
+
+	/* Someone else set d_mounted? */
+	if (ret == -EBUSY)
+		goto mountpoint;
+
+	/* The dentry is not available as a mountpoint? */
+	mp = ERR_PTR(ret);
+	if (ret)
+		goto done;
+
+	/* Add the new mountpoint to the hash table */
+	read_seqlock_excl(&mount_lock);
+	new->m_dentry = dentry;
+	new->m_count = 1;
+	hlist_add_head(&new->m_hash, mp_hash(dentry));
+	INIT_HLIST_HEAD(&new->m_list);
+	read_sequnlock_excl(&mount_lock);
+
+	mp = new;
+	new = NULL;
+done:
+	kfree(new);
 	return mp;
 }
 
@@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry)
 	struct mount *mnt;
 
 	namespace_lock();
+	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
 	if (IS_ERR_OR_NULL(mp))
 		goto out_unlock;
 
-	lock_mount_hash();
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry)
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
-	unlock_mount_hash();
 	put_mountpoint(mp);
 out_unlock:
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -2038,9 +2062,7 @@ retry:
 	namespace_lock();
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
-		struct mountpoint *mp = lookup_mountpoint(dentry);
-		if (!mp)
-			mp = new_mountpoint(dentry);
+		struct mountpoint *mp = get_mountpoint(dentry);
 		if (IS_ERR(mp)) {
 			namespace_unlock();
 			inode_unlock(dentry->d_inode);
@@ -2059,7 +2081,11 @@ retry:
 static void unlock_mount(struct mountpoint *where)
 {
 	struct dentry *dentry = where->m_dentry;
+
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(where);
+	read_sequnlock_excl(&mount_lock);
+
 	namespace_unlock();
 	inode_unlock(dentry->d_inode);
 }
@@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	/* A moved mount should not expire automatically */
 	list_del_init(&new_mnt->mnt_expire);
+	put_mountpoint(root_mp);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
-	put_mountpoint(root_mp);
 	error = 0;
 out4:
 	unlock_mount(old_mp);

From 75422726b0f717d67db3283c2eb5bc14fa2619c5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 Jan 2017 17:37:27 +1300
Subject: [PATCH 107/258] libfs: Modify mount_pseudo_xattr to be clear it is
 not a userspace mount

Add MS_KERNMOUNT to the flags that are passed.
Use sget_userns and force &init_user_ns instead of calling sget so that
even if called from a weird context the internal filesystem will be
considered to be in the intial user namespace.

Luis Ressel reported that the the failure to pass MS_KERNMOUNT into
mount_pseudo broke his in development graphics driver that uses the
generic drm infrastructure.  I am not certain the deriver was bug
free in it's usage of that infrastructure but since
mount_pseudo_xattr can never be triggered by userspace it is clearer
and less error prone, and less problematic for the code to be explicit.

Reported-by: Luis Ressel <aranea@aixah.de>
Tested-by: Luis Ressel <aranea@aixah.de>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/libfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index e973cd51f126..28d6f35feed6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
 	struct inode *root;
 	struct qstr d_name = QSTR_INIT(name, strlen(name));
 
-	s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
+	s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+			&init_user_ns, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 

From add7c65ca426b7a37184dd3d2172394e23d585d6 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@openvz.org>
Date: Wed, 4 Jan 2017 19:28:14 -0800
Subject: [PATCH 108/258] pid: fix lockdep deadlock warning due to ucount_lock

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
4.10.0-rc2-00024-g4aecec9-dirty #118 Tainted: G        W
---------------------------------------------------------
swapper/1/0 just changed the state of lock:
 (&(&sighand->siglock)->rlock){-.....}, at: [<ffffffffbd0a1bc6>] __lock_task_sighand+0xb6/0x2c0
but this lock took another, HARDIRQ-unsafe lock in the past:
 (ucounts_lock){+.+...}
and interrupts could create inverse lock ordering between them.
other info that might help us debug this:
Chain exists of:                 &(&sighand->siglock)->rlock --> &(&tty->ctrl_lock)->rlock --> ucounts_lock
 Possible interrupt unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(ucounts_lock);
                               local_irq_disable();
                               lock(&(&sighand->siglock)->rlock);
                               lock(&(&tty->ctrl_lock)->rlock);
  <Interrupt>
    lock(&(&sighand->siglock)->rlock);

 *** DEADLOCK ***

This patch removes a dependency between rlock and ucount_lock.

Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces")
Cc: stable@vger.kernel.org
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/pid_namespace.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index df9e8e9e0be7..eef2ce968636 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -151,8 +151,12 @@ out:
 
 static void delayed_free_pidns(struct rcu_head *p)
 {
-	kmem_cache_free(pid_ns_cachep,
-			container_of(p, struct pid_namespace, rcu));
+	struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
+
+	dec_pid_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+
+	kmem_cache_free(pid_ns_cachep, ns);
 }
 
 static void destroy_pid_namespace(struct pid_namespace *ns)
@@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
-	dec_pid_namespaces(ns->ucounts);
-	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
 

From 93362fa47fe98b62e4a34ab408c4a418432e7939 Mon Sep 17 00:00:00 2001
From: Zhou Chengming <zhouchengming1@huawei.com>
Date: Fri, 6 Jan 2017 09:32:32 +0800
Subject: [PATCH 109/258] sysctl: Drop reference added by grab_header in
 proc_sys_readdir

Fixes CVE-2016-9191, proc_sys_readdir doesn't drop reference
added by grab_header when return from !dir_emit_dots path.
It can cause any path called unregister_sysctl_table will
wait forever.

The calltrace of CVE-2016-9191:

[ 5535.960522] Call Trace:
[ 5535.963265]  [<ffffffff817cdaaf>] schedule+0x3f/0xa0
[ 5535.968817]  [<ffffffff817d33fb>] schedule_timeout+0x3db/0x6f0
[ 5535.975346]  [<ffffffff817cf055>] ? wait_for_completion+0x45/0x130
[ 5535.982256]  [<ffffffff817cf0d3>] wait_for_completion+0xc3/0x130
[ 5535.988972]  [<ffffffff810d1fd0>] ? wake_up_q+0x80/0x80
[ 5535.994804]  [<ffffffff8130de64>] drop_sysctl_table+0xc4/0xe0
[ 5536.001227]  [<ffffffff8130de17>] drop_sysctl_table+0x77/0xe0
[ 5536.007648]  [<ffffffff8130decd>] unregister_sysctl_table+0x4d/0xa0
[ 5536.014654]  [<ffffffff8130deff>] unregister_sysctl_table+0x7f/0xa0
[ 5536.021657]  [<ffffffff810f57f5>] unregister_sched_domain_sysctl+0x15/0x40
[ 5536.029344]  [<ffffffff810d7704>] partition_sched_domains+0x44/0x450
[ 5536.036447]  [<ffffffff817d0761>] ? __mutex_unlock_slowpath+0x111/0x1f0
[ 5536.043844]  [<ffffffff81167684>] rebuild_sched_domains_locked+0x64/0xb0
[ 5536.051336]  [<ffffffff8116789d>] update_flag+0x11d/0x210
[ 5536.057373]  [<ffffffff817cf61f>] ? mutex_lock_nested+0x2df/0x450
[ 5536.064186]  [<ffffffff81167acb>] ? cpuset_css_offline+0x1b/0x60
[ 5536.070899]  [<ffffffff810fce3d>] ? trace_hardirqs_on+0xd/0x10
[ 5536.077420]  [<ffffffff817cf61f>] ? mutex_lock_nested+0x2df/0x450
[ 5536.084234]  [<ffffffff8115a9f5>] ? css_killed_work_fn+0x25/0x220
[ 5536.091049]  [<ffffffff81167ae5>] cpuset_css_offline+0x35/0x60
[ 5536.097571]  [<ffffffff8115aa2c>] css_killed_work_fn+0x5c/0x220
[ 5536.104207]  [<ffffffff810bc83f>] process_one_work+0x1df/0x710
[ 5536.110736]  [<ffffffff810bc7c0>] ? process_one_work+0x160/0x710
[ 5536.117461]  [<ffffffff810bce9b>] worker_thread+0x12b/0x4a0
[ 5536.123697]  [<ffffffff810bcd70>] ? process_one_work+0x710/0x710
[ 5536.130426]  [<ffffffff810c3f7e>] kthread+0xfe/0x120
[ 5536.135991]  [<ffffffff817d4baf>] ret_from_fork+0x1f/0x40
[ 5536.142041]  [<ffffffff810c3e80>] ? kthread_create_on_node+0x230/0x230

One cgroup maintainer mentioned that "cgroup is trying to offline
a cpuset css, which takes place under cgroup_mutex.  The offlining
ends up trying to drain active usages of a sysctl table which apprently
is not happening."
The real reason is that proc_sys_readdir doesn't drop reference added
by grab_header when return from !dir_emit_dots path. So this cpuset
offline path will wait here forever.

See here for details: http://www.openwall.com/lists/oss-security/2016/11/04/13

Fixes: f0c3b5093add ("[readdir] convert procfs")
Cc: stable@vger.kernel.org
Reported-by: CAI Qian <caiqian@redhat.com>
Tested-by: Yang Shukui <yangshukui@huawei.com>
Signed-off-by: Zhou Chengming <zhouchengming1@huawei.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/proc_sysctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 55313d994895..d4e37acd4821 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 	ctl_dir = container_of(head, struct ctl_dir, header);
 
 	if (!dir_emit_dots(file, ctx))
-		return 0;
+		goto out;
 
 	pos = 2;
 
@@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 			break;
 		}
 	}
+out:
 	sysctl_head_finish(head);
 	return 0;
 }

From 21d25f6a4217e755906cb548b55ddab39d0e88b9 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Wed, 4 Jan 2017 01:22:52 -0800
Subject: [PATCH 110/258] dmaengine: iota: ioat_alloc_chan_resources should not
 perform sleeping allocations.

On a kernel with DEBUG_LOCKS, ioat_free_chan_resources triggers an
in_interrupt() warning.  With PROVE_LOCKING, it reports detecting a
SOFTIRQ-safe to SOFTIRQ-unsafe lock ordering in the same code path.

This is because dma_generic_alloc_coherent() checks if the GFP flags
permit blocking.  It allocates from different subsystems if blocking is
permitted.  The free path knows how to return the memory to the correct
allocator.  If GFP_KERNEL is specified then the alloc and free end up
going through cma_alloc(), which uses mutexes.

Given that ioat_free_chan_resources() can be called in interrupt
context, ioat_alloc_chan_resources() must specify GFP_NOWAIT so that the
allocations do not block and instead use an allocator that uses
spinlocks.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index ace5cb2cb12f..cc5259b881d4 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -700,7 +700,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
 	ioat_chan->completion =
 		dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
-				GFP_KERNEL, &ioat_chan->completion_dma);
+				GFP_NOWAIT, &ioat_chan->completion_dma);
 	if (!ioat_chan->completion)
 		return -ENOMEM;
 
@@ -710,7 +710,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
 	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
 
 	order = IOAT_MAX_ORDER;
-	ring = ioat_alloc_ring(c, order, GFP_KERNEL);
+	ring = ioat_alloc_ring(c, order, GFP_NOWAIT);
 	if (!ring)
 		return -ENOMEM;
 

From 527a27591312e4b3a0f8179f321f9e85c0850df0 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 9 Jan 2017 16:50:52 +0200
Subject: [PATCH 111/258] dmaengine: omap-dma: Fix the port_window support

We do not yet have users of port_window. The following errors were found
when converting the tusb6010_omap.c musb driver:

- The peripheral side must have SRC_/DST_PACKED disabled
- when configuring the burst for the peripheral side the memory side
  configuration were overwritten: d->csdp = ... -> d->csdp |= ...
- The EI and FI were configured for the wrong sides of the transfers.

With these changes and the converted tus6010_omap.c I was able to verify
that things are working as they expected to work.

Fixes: 201ac4861c19 ("dmaengine: omap-dma: Support for slave devices with data port window")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/omap-dma.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 4ad101a47e0a..daf479cce691 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -938,6 +938,23 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 		d->ccr |= CCR_DST_AMODE_POSTINC;
 		if (port_window) {
 			d->ccr |= CCR_SRC_AMODE_DBLIDX;
+
+			if (port_window_bytes >= 64)
+				d->csdp |= CSDP_SRC_BURST_64;
+			else if (port_window_bytes >= 32)
+				d->csdp |= CSDP_SRC_BURST_32;
+			else if (port_window_bytes >= 16)
+				d->csdp |= CSDP_SRC_BURST_16;
+
+		} else {
+			d->ccr |= CCR_SRC_AMODE_CONSTANT;
+		}
+	} else {
+		d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+
+		d->ccr |= CCR_SRC_AMODE_POSTINC;
+		if (port_window) {
+			d->ccr |= CCR_DST_AMODE_DBLIDX;
 			d->ei = 1;
 			/*
 			 * One frame covers the port_window and by  configure
@@ -948,27 +965,11 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 			d->fi = -(port_window_bytes - 1);
 
 			if (port_window_bytes >= 64)
-				d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+				d->csdp |= CSDP_DST_BURST_64;
 			else if (port_window_bytes >= 32)
-				d->csdp = CSDP_SRC_BURST_32 | CSDP_SRC_PACKED;
+				d->csdp |= CSDP_DST_BURST_32;
 			else if (port_window_bytes >= 16)
-				d->csdp = CSDP_SRC_BURST_16 | CSDP_SRC_PACKED;
-		} else {
-			d->ccr |= CCR_SRC_AMODE_CONSTANT;
-		}
-	} else {
-		d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
-
-		d->ccr |= CCR_SRC_AMODE_POSTINC;
-		if (port_window) {
-			d->ccr |= CCR_DST_AMODE_DBLIDX;
-
-			if (port_window_bytes >= 64)
-				d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
-			else if (port_window_bytes >= 32)
-				d->csdp = CSDP_DST_BURST_32 | CSDP_DST_PACKED;
-			else if (port_window_bytes >= 16)
-				d->csdp = CSDP_DST_BURST_16 | CSDP_DST_PACKED;
+				d->csdp |= CSDP_DST_BURST_16;
 		} else {
 			d->ccr |= CCR_DST_AMODE_CONSTANT;
 		}
@@ -1017,7 +1018,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 		osg->addr = sg_dma_address(sgent);
 		osg->en = en;
 		osg->fn = sg_dma_len(sgent) / frame_bytes;
-		if (port_window && dir == DMA_MEM_TO_DEV) {
+		if (port_window && dir == DMA_DEV_TO_MEM) {
 			osg->ei = 1;
 			/*
 			 * One frame covers the port_window and by  configure

From 497de07d89c1410d76a15bec2bb41f24a2a89f31 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guzheng1@huawei.com>
Date: Mon, 9 Jan 2017 09:34:48 +0800
Subject: [PATCH 112/258] tmpfs: clear S_ISGID when setting posix ACLs

This change was missed the tmpfs modification in In CVE-2016-7097
commit 073931017b49 ("posix_acl: Clear SGID bit when setting
file permissions")
It can test by xfstest generic/375, which failed to clear
setgid bit in the following test case on tmpfs:

  touch $testfile
  chown 100:100 $testfile
  chmod 2755 $testfile
  _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile

Signed-off-by: Gu Zheng <guzheng1@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/posix_acl.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 595522022aca..c9d48dc78495 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	int error;
 
 	if (type == ACL_TYPE_ACCESS) {
-		error = posix_acl_equiv_mode(acl, &inode->i_mode);
-		if (error < 0)
-			return 0;
-		if (error == 0)
-			acl = NULL;
+		error = posix_acl_update_mode(inode,
+				&inode->i_mode, &acl);
+		if (error)
+			return error;
 	}
 
 	inode->i_ctime = current_time(inode);

From 2e40795c3bf344cfb5220d94566205796e3ef19a Mon Sep 17 00:00:00 2001
From: Dennis Kadioglu <denk@post.com>
Date: Mon, 9 Jan 2017 17:10:46 +0100
Subject: [PATCH 113/258] ALSA: usb-audio: Add a quirk for Plantronics BT600

Plantronics BT600 does not support reading the sample rate which leads
to many lines of "cannot get freq at ep 0x1" and "cannot get freq at
ep 0x82". This patch adds the USB ID of the BT600 to quirks.c and
avoids those error messages.

Signed-off-by: Dennis Kadioglu <denk@post.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index b3fd2382fdd9..eb4b9f7a571e 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+	case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */
 	case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */

From 19a91dd4e39e755d650444da7f3a571b40a11093 Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
Date: Fri, 23 Dec 2016 16:01:08 +0100
Subject: [PATCH 114/258] MMC: meson: avoid possible NULL dereference

No actual segmentation faults were observed but the coding is
at least inconsistent.

irqreturn_t meson_mmc_irq():

We should not dereference host before checking it.

meson_mmc_irq_thread():

If cmd or mrq are NULL we should not dereference them after
writing a warning.

Fixes: 51c5d8447bd7 MMC: meson: initial support for GX platforms
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index b352760c041e..09739352834c 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -578,13 +578,15 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 {
 	struct meson_host *host = dev_id;
 	struct mmc_request *mrq;
-	struct mmc_command *cmd = host->cmd;
+	struct mmc_command *cmd;
 	u32 irq_en, status, raw_status;
 	irqreturn_t ret = IRQ_HANDLED;
 
 	if (WARN_ON(!host))
 		return IRQ_NONE;
 
+	cmd = host->cmd;
+
 	mrq = host->mrq;
 
 	if (WARN_ON(!mrq))
@@ -670,10 +672,10 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
 	int ret = IRQ_HANDLED;
 
 	if (WARN_ON(!mrq))
-		ret = IRQ_NONE;
+		return IRQ_NONE;
 
 	if (WARN_ON(!cmd))
-		ret = IRQ_NONE;
+		return IRQ_NONE;
 
 	data = cmd->data;
 	if (data) {

From 146cc8a17a3b4996f6805ee5c080e7101277c410 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 10 Jan 2017 12:05:37 +0100
Subject: [PATCH 115/258] USB: serial: kl5kusb105: fix line-state error
 handling

The current implementation failed to detect short transfers when
attempting to read the line state, and also, to make things worse,
logged the content of the uninitialised heap transfer buffer.

Fixes: abf492e7b3ae ("USB: kl5kusb105: fix DMA buffers on stack")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/kl5kusb105.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 0ee190fc1bf8..6cb45757818f 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -192,10 +192,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port,
 			     status_buf, KLSI_STATUSBUF_LEN,
 			     10000
 			     );
-	if (rc < 0)
-		dev_err(&port->dev, "Reading line status failed (error = %d)\n",
-			rc);
-	else {
+	if (rc != KLSI_STATUSBUF_LEN) {
+		dev_err(&port->dev, "reading line status failed: %d\n", rc);
+		if (rc >= 0)
+			rc = -EIO;
+	} else {
 		status = get_unaligned_le16(status_buf);
 
 		dev_info(&port->serial->dev->dev, "read status %x %x\n",

From 620f1a632ebcc9811c2f8009ba52297c7006f805 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 13 Dec 2016 18:50:13 -0800
Subject: [PATCH 116/258] wusbcore: Fix one more crypto-on-the-stack bug

The driver put a constant buffer of all zeros on the stack and
pointed a scatterlist entry at it.  This doesn't work with virtual
stacks.  Use ZERO_PAGE instead.

Cc: stable@vger.kernel.org # 4.9 only
Reported-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/crypto.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 79451f7ef1b7..062c205f0046 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -216,7 +216,6 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	struct scatterlist sg[4], sg_dst;
 	void *dst_buf;
 	size_t dst_size;
-	const u8 bzero[16] = { 0 };
 	u8 iv[crypto_skcipher_ivsize(tfm_cbc)];
 	size_t zero_padding;
 
@@ -261,7 +260,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1));
 	sg_set_buf(&sg[2], b, blen);
 	/* 0 if well behaved :) */
-	sg_set_buf(&sg[3], bzero, zero_padding);
+	sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0);
 	sg_init_one(&sg_dst, dst_buf, dst_size);
 
 	skcipher_request_set_tfm(req, tfm_cbc);

From e864212078ded276bdb272b2e0ee6a979357ca8a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:53 +0100
Subject: [PATCH 117/258] target: add XCOPY target/segment desc sense codes

As defined in http://www.t10.org/lists/asc-num.htm. To be used during
validation of XCOPY target and segment descriptor lists.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_transport.c | 24 ++++++++++++++++++++++++
 include/target/target_core_base.h      |  4 ++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 7dfefd66df93..1cadc9eefa21 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1693,6 +1693,10 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
 	case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
 	case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE:
+	case TCM_TOO_MANY_TARGET_DESCS:
+	case TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE:
+	case TCM_TOO_MANY_SEGMENT_DESCS:
+	case TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE:
 		break;
 	case TCM_OUT_OF_RESOURCES:
 		sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -2808,6 +2812,26 @@ static const struct sense_info sense_info_table[] = {
 		.key = ILLEGAL_REQUEST,
 		.asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */
 	},
+	[TCM_TOO_MANY_TARGET_DESCS] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x06, /* TOO MANY TARGET DESCRIPTORS */
+	},
+	[TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x07, /* UNSUPPORTED TARGET DESCRIPTOR TYPE CODE */
+	},
+	[TCM_TOO_MANY_SEGMENT_DESCS] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x08, /* TOO MANY SEGMENT DESCRIPTORS */
+	},
+	[TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x09, /* UNSUPPORTED SEGMENT DESCRIPTOR TYPE CODE */
+	},
 	[TCM_PARAMETER_LIST_LENGTH_ERROR] = {
 		.key = ILLEGAL_REQUEST,
 		.asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 29e6858bb164..43edf82e54ff 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -174,6 +174,10 @@ enum tcm_sense_reason_table {
 	TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED	= R(0x16),
 	TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED	= R(0x17),
 	TCM_COPY_TARGET_DEVICE_NOT_REACHABLE	= R(0x18),
+	TCM_TOO_MANY_TARGET_DESCS		= R(0x19),
+	TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE	= R(0x1a),
+	TCM_TOO_MANY_SEGMENT_DESCS		= R(0x1b),
+	TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE	= R(0x1c),
 #undef R
 };
 

From 61c359194c46cbffec9a1f2c59c1c4011222ad84 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:54 +0100
Subject: [PATCH 118/258] target: use XCOPY TOO MANY TARGET DESCRIPTORS sense

spc4r37 6.4.3.4 states:
  If the number of CSCD descriptors exceeds the allowed number, the copy
  manager shall terminate the command with CHECK CONDITION status, with
  the sense key set to ILLEGAL REQUEST, and the additional sense code
  set to TOO MANY TARGET DESCRIPTORS.

LIO currently responds with INVALID FIELD IN PARAMETER LIST, which sees
it fail the libiscsi ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 37d5caebffa6..db265ad10fa4 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -201,9 +201,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 			" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
 		return -EINVAL;
 	}
-	if (tdll > 64) {
+	if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) {
 		pr_err("XCOPY target descriptor supports a maximum"
 			" two src/dest descriptors, tdll: %hu too large..\n", tdll);
+		/* spc4r37 6.4.3.4 CSCD DESCRIPTOR LIST LENGTH field */
+		*sense_ret = TCM_TOO_MANY_TARGET_DESCS;
 		return -EINVAL;
 	}
 	/*

From af9f62c1686268c0517b289274d38f3a03bebd2a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:55 +0100
Subject: [PATCH 119/258] target: bounds check XCOPY segment descriptor list

Check the length of the XCOPY request segment descriptor list against
the value advertised via the MAXIMUM SEGMENT DESCRIPTOR COUNT field in
the RECEIVE COPY OPERATING PARAMETERS response.

spc4r37 6.4.3.5 states:
  If the number of segment descriptors exceeds the allowed number, the
  copy manager shall terminate the command with CHECK CONDITION status,
  with the sense key set to ILLEGAL REQUEST, and the additional sense
  code set to TOO MANY SEGMENT DESCRIPTORS.

This functionality is testable using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index db265ad10fa4..da0f2da732e7 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -308,17 +308,26 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 
 static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 				struct xcopy_op *xop, unsigned char *p,
-				unsigned int sdll)
+				unsigned int sdll, sense_reason_t *sense_ret)
 {
 	unsigned char *desc = p;
 	unsigned int start = 0;
 	int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0;
 
+	*sense_ret = TCM_INVALID_PARAMETER_LIST;
+
 	if (offset != 0) {
 		pr_err("XCOPY segment descriptor list length is not"
 			" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
 		return -EINVAL;
 	}
+	if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
+		pr_err("XCOPY supports %u segment descriptor(s), sdll: %u too"
+			" large..\n", RCR_OP_MAX_SG_DESC_COUNT, sdll);
+		/* spc4r37 6.4.3.5 SEGMENT DESCRIPTOR LIST LENGTH field */
+		*sense_ret = TCM_TOO_MANY_SEGMENT_DESCS;
+		return -EINVAL;
+	}
 
 	while (start < sdll) {
 		/*
@@ -916,7 +925,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	seg_desc = &p[16];
 	seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
 
-	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll);
+	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
+						    sdll, &ret);
 	if (rc <= 0) {
 		xcopy_pt_undepend_remotedev(xop);
 		goto out;

From 7d38706669ce00603b187f667a4eb67c94eac098 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:56 +0100
Subject: [PATCH 120/258] target: bounds check XCOPY total descriptor list
 length

spc4r37 6.4.3.5 states:
  If the combined length of the CSCD descriptors and segment descriptors
  exceeds the allowed value, then the copy manager shall terminate the
  command with CHECK CONDITION status, with the sense key set to ILLEGAL
  REQUEST, and the additional sense code set to PARAMETER LIST LENGTH
  ERROR.

This functionality can be tested using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index da0f2da732e7..0d10fcf438d1 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -894,6 +894,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	 */
 	tdll = get_unaligned_be16(&p[2]);
 	sdll = get_unaligned_be32(&p[8]);
+	if (tdll + sdll > RCR_OP_MAX_DESC_LIST_LEN) {
+		pr_err("XCOPY descriptor list length %u exceeds maximum %u\n",
+		       tdll + sdll, RCR_OP_MAX_DESC_LIST_LEN);
+		ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+		goto out;
+	}
 
 	inline_dl = get_unaligned_be32(&p[12]);
 	if (inline_dl != 0) {

From c243849720ac237e9e7191fe57f619bb3a871d4c Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:04 +0100
Subject: [PATCH 121/258] target: return UNSUPPORTED TARGET/SEGMENT DESC TYPE
 CODE sense

Use UNSUPPORTED TARGET DESCRIPTOR TYPE CODE and UNSUPPORTED SEGMENT
DESCRIPTOR TYPE CODE additional sense codes if a descriptor type in an
XCOPY request is not supported, as specified in spc4r37 6.4.5 and 6.4.6.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 0d10fcf438d1..52738a108e5f 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -199,6 +199,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	if (offset != 0) {
 		pr_err("XCOPY target descriptor list length is not"
 			" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
+		*sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
 		return -EINVAL;
 	}
 	if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) {
@@ -240,6 +241,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 		default:
 			pr_err("XCOPY unsupported descriptor type code:"
 					" 0x%02x\n", desc[0]);
+			*sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
 			goto out;
 		}
 	}
@@ -319,6 +321,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 	if (offset != 0) {
 		pr_err("XCOPY segment descriptor list length is not"
 			" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
+		*sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
 		return -EINVAL;
 	}
 	if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
@@ -346,6 +349,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 		default:
 			pr_err("XCOPY unsupported segment descriptor"
 				"type: 0x%02x\n", desc[0]);
+			*sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
 			goto out;
 		}
 	}

From 94aae4caacda89a1bdb7198b260f4ca3595b7ed7 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:05 +0100
Subject: [PATCH 122/258] target: simplify XCOPY wwn->se_dev lookup helper

target_xcopy_locate_se_dev_e4() is used to locate an se_dev, based on
the WWN provided with the XCOPY request. Remove a couple of unneeded
arguments, and rely on the caller for the src/dst test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 52738a108e5f..155db18ee4e7 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -53,18 +53,13 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 	return 0;
 }
 
-static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
-					bool src)
+static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
+					struct se_device **found_dev)
 {
 	struct se_device *se_dev;
-	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
+	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
-	if (src)
-		dev_wwn = &xop->dst_tid_wwn[0];
-	else
-		dev_wwn = &xop->src_tid_wwn[0];
-
 	mutex_lock(&g_device_mutex);
 	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
 
@@ -78,15 +73,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
 		if (rc != 0)
 			continue;
 
-		if (src) {
-			xop->dst_dev = se_dev;
-			pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located"
-				" se_dev\n", xop->dst_dev);
-		} else {
-			xop->src_dev = se_dev;
-			pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located"
-				" se_dev\n", xop->src_dev);
-		}
+		*found_dev = se_dev;
+		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
 		rc = target_depend_item(&se_dev->dev_group.cg_item);
 		if (rc != 0) {
@@ -247,9 +235,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	}
 
 	if (xop->op_origin == XCOL_SOURCE_RECV_OP)
-		rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
+		rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn,
+						&xop->dst_dev);
 	else
-		rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
+		rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn,
+						&xop->src_dev);
 	/*
 	 * If a matching IEEE NAA 0x83 descriptor for the requested device
 	 * is not located on this node, return COPY_ABORTED with ASQ/ASQC

From f184210bca6c9d0091ff5e5629dea4cbb8a17c0f Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:06 +0100
Subject: [PATCH 123/258] target: check XCOPY segment descriptor CSCD IDs

Ensure that the segment descriptor CSCD descriptor ID values correspond
to CSCD descriptor entries located in the XCOPY command parameter list.
SPC4r37 6.4.6.1 Table 150 specifies this range as 0000h to 07FFh, where
the CSCD descriptor location in the parameter list can be located via:
16 + (id * 32)

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ bvanassche: inserted "; " in the format string of an error message
  and also moved a "||" operator from the start of a line to the end
  of the previous line ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 8 ++++++++
 drivers/target/target_core_xcopy.h | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 155db18ee4e7..41a2a8ad1046 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -278,6 +278,14 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 
 	xop->stdi = get_unaligned_be16(&desc[4]);
 	xop->dtdi = get_unaligned_be16(&desc[6]);
+
+	if (xop->stdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX ||
+	    xop->dtdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX) {
+		pr_err("XCOPY segment desc 0x02: unsupported CSCD ID > 0x%x; stdi: %hu dtdi: %hu\n",
+			XCOPY_CSCD_DESC_ID_LIST_OFF_MAX, xop->stdi, xop->dtdi);
+		return -EINVAL;
+	}
+
 	pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n",
 		desc_len, xop->stdi, xop->dtdi, dc);
 
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
index 4d3d4dd060f2..e2d141140342 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -5,6 +5,12 @@
 #define XCOPY_NAA_IEEE_REGEX_LEN	16
 #define XCOPY_MAX_SECTORS		1024
 
+/*
+ * SPC4r37 6.4.6.1
+ * Table 150 — CSCD descriptor ID values
+ */
+#define XCOPY_CSCD_DESC_ID_LIST_OFF_MAX	0x07FF
+
 enum xcopy_origin_list {
 	XCOL_SOURCE_RECV_OP = 0x01,
 	XCOL_DEST_RECV_OP = 0x02,

From 66640d35c1e4ef3c96ba5edb3c5e2ff8ab812e7a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:07 +0100
Subject: [PATCH 124/258] target: use XCOPY segment descriptor CSCD IDs

The XCOPY specification in SPC4r37 states that the XCOPY source and
destination device(s) should be derived from the copy source and copy
destination (CSCD) descriptor IDs in the XCOPY segment descriptor.

The CSCD IDs are generally (for block -> block copies), indexes into
the corresponding CSCD descriptor list, e.g.
=================================
EXTENDED COPY Header
=================================
CSCD Descriptor List
- entry 0
  + LU ID <--------------<------------------\
- entry 1                                   |
  + LU ID <______________<_____________     |
=================================      |    |
Segment Descriptor List                |    |
- segment 0                            |    |
  + src CSCD ID = 0 --------->---------+----/
  + dest CSCD ID = 1 ___________>______|
  + len
  + src lba
  + dest lba
=================================

Currently LIO completely ignores the src and dest CSCD IDs in the
Segment Descriptor List, and instead assumes that the first entry in the
CSCD list corresponds to the source, and the second to the destination.

This commit removes this assumption, by ensuring that the Segment
Descriptor List is parsed prior to processing the CSCD Descriptor List.
CSCD Descriptor List processing is modified to compare the current list
index with the previously obtained src and dest CSCD IDs.

Additionally, XCOPY requests where the src and dest CSCD IDs refer to
the CSCD Descriptor List entry can now be successfully processed.

Fixes: cbf031f ("target: Add support for EXTENDED_COPY copy offload")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191381
Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 79 ++++++++++++++++++------------
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 41a2a8ad1046..2595c1eb9e91 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -98,7 +98,7 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
-				unsigned char *p, bool src)
+				unsigned char *p, unsigned short cscd_index)
 {
 	unsigned char *desc = p;
 	unsigned short ript;
@@ -143,7 +143,13 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op
 		return -EINVAL;
 	}
 
-	if (src) {
+	if (cscd_index != xop->stdi && cscd_index != xop->dtdi) {
+		pr_debug("XCOPY 0xe4: ignoring CSCD entry %d - neither src nor "
+			 "dest\n", cscd_index);
+		return 0;
+	}
+
+	if (cscd_index == xop->stdi) {
 		memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
 		 * Determine if the source designator matches the local device
@@ -155,10 +161,15 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op
 			pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source"
 					" received xop\n", xop->src_dev);
 		}
-	} else {
+	}
+
+	if (cscd_index == xop->dtdi) {
 		memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
-		 * Determine if the destination designator matches the local device
+		 * Determine if the destination designator matches the local
+		 * device. If @cscd_index corresponds to both source (stdi) and
+		 * destination (dtdi), or dtdi comes after stdi, then
+		 * XCOL_DEST_RECV_OP wins.
 		 */
 		if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0],
 				XCOPY_NAA_IEEE_REGEX_LEN)) {
@@ -178,9 +189,9 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 {
 	struct se_device *local_dev = se_cmd->se_dev;
 	unsigned char *desc = p;
-	int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0;
+	int offset = tdll % XCOPY_TARGET_DESC_LEN, rc;
+	unsigned short cscd_index = 0;
 	unsigned short start = 0;
-	bool src = true;
 
 	*sense_ret = TCM_INVALID_PARAMETER_LIST;
 
@@ -206,25 +217,19 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 
 	while (start < tdll) {
 		/*
-		 * Check target descriptor identification with 0xE4 type with
-		 * use VPD 0x83 WWPN matching ..
+		 * Check target descriptor identification with 0xE4 type, and
+		 * compare the current index with the CSCD descriptor IDs in
+		 * the segment descriptor. Use VPD 0x83 WWPN matching ..
 		 */
 		switch (desc[0]) {
 		case 0xe4:
 			rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop,
-							&desc[0], src);
+							&desc[0], cscd_index);
 			if (rc != 0)
 				goto out;
-			/*
-			 * Assume target descriptors are in source -> destination order..
-			 */
-			if (src)
-				src = false;
-			else
-				src = true;
 			start += XCOPY_TARGET_DESC_LEN;
 			desc += XCOPY_TARGET_DESC_LEN;
-			ret++;
+			cscd_index++;
 			break;
 		default:
 			pr_err("XCOPY unsupported descriptor type code:"
@@ -234,12 +239,21 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 		}
 	}
 
-	if (xop->op_origin == XCOL_SOURCE_RECV_OP)
+	switch (xop->op_origin) {
+	case XCOL_SOURCE_RECV_OP:
 		rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn,
 						&xop->dst_dev);
-	else
+		break;
+	case XCOL_DEST_RECV_OP:
 		rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn,
 						&xop->src_dev);
+		break;
+	default:
+		pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - "
+			"stdi: %hu dtdi: %hu\n", xop->stdi, xop->dtdi);
+		rc = -EINVAL;
+		break;
+	}
 	/*
 	 * If a matching IEEE NAA 0x83 descriptor for the requested device
 	 * is not located on this node, return COPY_ABORTED with ASQ/ASQC
@@ -256,7 +270,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n",
 		 xop->dst_dev, &xop->dst_tid_wwn[0]);
 
-	return ret;
+	return cscd_index;
 
 out:
 	return -EINVAL;
@@ -913,6 +927,20 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		" tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
 		tdll, sdll, inline_dl);
 
+	/*
+	 * skip over the target descriptors until segment descriptors
+	 * have been passed - CSCD ids are needed to determine src and dest.
+	 */
+	seg_desc = &p[16] + tdll;
+
+	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
+						    sdll, &ret);
+	if (rc <= 0)
+		goto out;
+
+	pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc,
+				rc * XCOPY_SEGMENT_DESC_LEN);
+
 	rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll, &ret);
 	if (rc <= 0)
 		goto out;
@@ -930,19 +958,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 
 	pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc,
 				rc * XCOPY_TARGET_DESC_LEN);
-	seg_desc = &p[16];
-	seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
-
-	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
-						    sdll, &ret);
-	if (rc <= 0) {
-		xcopy_pt_undepend_remotedev(xop);
-		goto out;
-	}
 	transport_kunmap_data_sg(se_cmd);
 
-	pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc,
-				rc * XCOPY_SEGMENT_DESC_LEN);
 	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
 	queue_work(xcopy_wq, &xop->xop_work);
 	return TCM_NO_SENSE;

From f94fd098f674b78c29f482da1999d8de0c93c74e Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:08 +0100
Subject: [PATCH 125/258] target: check for XCOPY parameter truncation

Check for XCOPY header, CSCD descriptor and segment descriptor list
truncation, and respond accordingly.

SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs):
  If the parameter list length causes truncation of the parameter list,
  then the copy manager shall transfer no data and shall terminate the
  EXTENDED COPY command with CHECK CONDITION status, with the sense key
  set to ILLEGAL REQUEST, and the additional sense code set to PARAMETER
  LIST LENGTH ERROR.

This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr
test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 14 ++++++++++++++
 drivers/target/target_core_xcopy.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 2595c1eb9e91..a9a6462c66d1 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -888,6 +888,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		return TCM_UNSUPPORTED_SCSI_OPCODE;
 	}
 
+	if (se_cmd->data_length < XCOPY_HDR_LEN) {
+		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
+				se_cmd->data_length, XCOPY_HDR_LEN);
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
+	}
+
 	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
 	if (!xop) {
 		pr_err("Unable to allocate xcopy_op\n");
@@ -923,6 +929,14 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		goto out;
 	}
 
+	if (se_cmd->data_length < (XCOPY_HDR_LEN + tdll + sdll + inline_dl)) {
+		pr_err("XCOPY parameter truncation: data length %u too small "
+			"for tdll: %hu sdll: %u inline_dl: %u\n",
+			se_cmd->data_length, tdll, sdll, inline_dl);
+		ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+		goto out;
+	}
+
 	pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x"
 		" tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
 		tdll, sdll, inline_dl);
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
index e2d141140342..7c0b105cbe1b 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -1,5 +1,6 @@
 #include <target/target_core_base.h>
 
+#define XCOPY_HDR_LEN			16
 #define XCOPY_TARGET_DESC_LEN		32
 #define XCOPY_SEGMENT_DESC_LEN		28
 #define XCOPY_NAA_IEEE_REGEX_LEN	16

From 87156518da94a696f2b27ab8945d531af2f1d339 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:09 +0100
Subject: [PATCH 126/258] target: support XCOPY requests without parameters

SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs):
  A parameter list length of zero specifies that the copy manager shall
  not transfer any data or alter any internal state, and this shall not
  be considered an error.

This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr
test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index a9a6462c66d1..d828b3b5000b 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -888,6 +888,10 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		return TCM_UNSUPPORTED_SCSI_OPCODE;
 	}
 
+	if (se_cmd->data_length == 0) {
+		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
+		return TCM_NO_SENSE;
+	}
 	if (se_cmd->data_length < XCOPY_HDR_LEN) {
 		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
 				se_cmd->data_length, XCOPY_HDR_LEN);

From 7b6c1b4c0e1e44544aa18161dba6a741c080a7ef Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Tue, 10 Jan 2017 10:46:00 -0600
Subject: [PATCH 127/258] usb: musb: fix runtime PM in debugfs

MUSB driver now has runtime PM support, but the debugfs driver misses
the PM _get/_put() calls, which could cause MUSB register access
failure.

Cc: stable@vger.kernel.org # 4.9+
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_debugfs.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index 4fef50e5c8c1..dd70c88419d2 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -114,6 +114,7 @@ static int musb_regdump_show(struct seq_file *s, void *unused)
 	unsigned		i;
 
 	seq_printf(s, "MUSB (M)HDRC Register Dump\n");
+	pm_runtime_get_sync(musb->controller);
 
 	for (i = 0; i < ARRAY_SIZE(musb_regmap); i++) {
 		switch (musb_regmap[i].size) {
@@ -132,6 +133,8 @@ static int musb_regdump_show(struct seq_file *s, void *unused)
 		}
 	}
 
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return 0;
 }
 
@@ -145,7 +148,10 @@ static int musb_test_mode_show(struct seq_file *s, void *unused)
 	struct musb		*musb = s->private;
 	unsigned		test;
 
+	pm_runtime_get_sync(musb->controller);
 	test = musb_readb(musb->mregs, MUSB_TESTMODE);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	if (test & MUSB_TEST_FORCE_HOST)
 		seq_printf(s, "force host\n");
@@ -194,11 +200,12 @@ static ssize_t musb_test_mode_write(struct file *file,
 	u8			test;
 	char			buf[18];
 
+	pm_runtime_get_sync(musb->controller);
 	test = musb_readb(musb->mregs, MUSB_TESTMODE);
 	if (test) {
 		dev_err(musb->controller, "Error: test mode is already set. "
 			"Please do USB Bus Reset to start a new test.\n");
-		return count;
+		goto ret;
 	}
 
 	memset(buf, 0x00, sizeof(buf));
@@ -234,6 +241,9 @@ static ssize_t musb_test_mode_write(struct file *file,
 
 	musb_writeb(musb->mregs, MUSB_TESTMODE, test);
 
+ret:
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return count;
 }
 
@@ -254,8 +264,13 @@ static int musb_softconnect_show(struct seq_file *s, void *unused)
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_HOST:
 	case OTG_STATE_A_WAIT_BCON:
+		pm_runtime_get_sync(musb->controller);
+
 		reg = musb_readb(musb->mregs, MUSB_DEVCTL);
 		connect = reg & MUSB_DEVCTL_SESSION ? 1 : 0;
+
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
 		break;
 	default:
 		connect = -1;
@@ -284,6 +299,7 @@ static ssize_t musb_softconnect_write(struct file *file,
 	if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
 		return -EFAULT;
 
+	pm_runtime_get_sync(musb->controller);
 	if (!strncmp(buf, "0", 1)) {
 		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_HOST:
@@ -314,6 +330,8 @@ static ssize_t musb_softconnect_write(struct file *file,
 		}
 	}
 
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return count;
 }
 

From 7c9d8d0c41b3e24473ac7648a7fc2d644ccf08ff Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bgly@us.ibm.com>
Date: Mon, 9 Jan 2017 10:21:20 -0600
Subject: [PATCH 128/258] ibmvscsis: Fix srp_transfer_data fail return code

If srp_transfer_data fails within ibmvscsis_write_pending, then
the most likely scenario is that the client timed out the op and
removed the TCE mapping. Thus it will loop forever retrying the
op that is pretty much guaranteed to fail forever. A better return
code would be EIO instead of EAGAIN.

Cc: stable@vger.kernel.org
Reported-by: Steven Royer <seroyer@linux.vnet.ibm.com>
Tested-by: Steven Royer <seroyer@linux.vnet.ibm.com>
Signed-off-by: Bryant G. Ly <bgly@us.ibm.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 3d3768aaab4f..8fb5c54c7dd3 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3585,7 +3585,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
 			       1, 1);
 	if (rc) {
 		pr_err("srp_transfer_data() failed: %d\n", rc);
-		return -EAGAIN;
+		return -EIO;
 	}
 	/*
 	 * We now tell TCM to add this WRITE CDB directly into the TCM storage

From dd545b52a3e1efd9f2c6352dbe95ccd0c53461cc Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Tue, 10 Jan 2017 13:29:54 -0700
Subject: [PATCH 129/258] do_direct_IO: Use inode->i_blkbits to compute block
 count to be cleaned

The code currently uses sdio->blkbits to compute the number of blocks to
be cleaned. However sdio->blkbits is derived from the logical block size
of the underlying block device (Refer to the definition of
do_blockdev_direct_IO()). Due to this, generic/299 test would rarely
fail when executed on an ext4 filesystem with 64k as the block size and
when using a virtio based disk (having 512 byte as the logical block
size) inside a kvm guest.

This commit fixes the bug by using inode->i_blkbits to compute the
number of blocks to be cleaned.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

Fixed up by Jeff Moyer to only use/evaluate inode->i_blkbits once,
to avoid issues with block size changes with IO in flight.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/direct-io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index aeae8c063451..c87bae4376b8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 			struct buffer_head *map_bh)
 {
 	const unsigned blkbits = sdio->blkbits;
+	const unsigned i_blkbits = blkbits + sdio->blkfactor;
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
@@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 					clean_bdev_aliases(
 						map_bh->b_bdev,
 						map_bh->b_blocknr,
-						map_bh->b_size >> blkbits);
+						map_bh->b_size >> i_blkbits);
 				}
 
 				if (!sdio->blkfactor)

From a14d749fcebe97ddf6af6db3d1f6ece85c9ddcb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 08:56:23 -0700
Subject: [PATCH 130/258] virtio_blk: avoid DMA to stack for the sense buffer

Most users of BLOCK_PC requests allocate the sense buffer on the stack,
so to avoid DMA to the stack copy them to a field in the heap allocated
virtblk_req structure.  Without that any attempt at SCSI passthrough I/O,
including the SG_IO ioctl from userspace will crash the kernel.  Note that
this includes running tools like hdparm even when the host does not have
SCSI passthrough enabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/virtio_blk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5545a679abd8..3c3b8f601469 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -56,6 +56,7 @@ struct virtblk_req {
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
 	u8 status;
+	u8 sense[SCSI_SENSE_BUFFERSIZE];
 	struct scatterlist sg[];
 };
 
@@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	}
 
 	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
-		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
+		memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
+		sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
 		sgs[num_out + num_in++] = &sense;
 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
 		sgs[num_out + num_in++] = &inhdr;

From 25b4acfc7de0fc4da3bfea3a316f7282c6fbde81 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Mon, 9 Jan 2017 15:20:31 -0500
Subject: [PATCH 131/258] nbd: blk_mq_init_queue returns an error code on
 failure, not NULL

Additionally, don't assign directly to disk->queue, otherwise
blk_put_queue (called via put_disk) will choke (panic) on the errno
stored there.

Bug found by code inspection after Omar found a similar issue in
virtio_blk.  Compile-tested only.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 38c576f76d36..50a2020b5b72 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1042,6 +1042,7 @@ static int __init nbd_init(void)
 		return -ENOMEM;
 
 	for (i = 0; i < nbds_max; i++) {
+		struct request_queue *q;
 		struct gendisk *disk = alloc_disk(1 << part_shift);
 		if (!disk)
 			goto out;
@@ -1067,12 +1068,13 @@ static int __init nbd_init(void)
 		 * every gendisk to have its very own request_queue struct.
 		 * These structs are big so we dynamically allocate them.
 		 */
-		disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
-		if (!disk->queue) {
+		q = blk_mq_init_queue(&nbd_dev[i].tag_set);
+		if (IS_ERR(q)) {
 			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
 			put_disk(disk);
 			goto out;
 		}
+		disk->queue = q;
 
 		/*
 		 * Tell the block layer that we are not a rotational device

From 6bf6b0aa3da84a3d9126919a94c49c0fb7ee2fb3 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 9 Jan 2017 11:44:12 -0800
Subject: [PATCH 132/258] virtio_blk: fix panic in initialization error path

If blk_mq_init_queue() returns an error, it gets assigned to
vblk->disk->queue. Then, when we call put_disk(), we end up calling
blk_put_queue() with the ERR_PTR, causing a bad dereference. Fix it by
only assigning to vblk->disk->queue on success.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/virtio_blk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3c3b8f601469..10332c24f961 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -630,11 +630,12 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_put_disk;
 
-	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
+	q = blk_mq_init_queue(&vblk->tag_set);
 	if (IS_ERR(q)) {
 		err = -ENOMEM;
 		goto out_free_tags;
 	}
+	vblk->disk->queue = q;
 
 	q->queuedata = vblk;
 

From 7ee7f45a763bd68c3a606595a8c1bb08c3e6146b Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 11 Jan 2017 01:27:21 +0200
Subject: [PATCH 133/258] mei: bus: enable OS version only for SPT and newer

Sending OS version for support of TPM2_ChangeEPS() is required only
for SPT FW (HMB version 2.0) and newer.
On older platforms the command should be just ignored by the firmware
but some older platforms misbehave so it's safer to send the command
only if required.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=192051
Fixes: 7279b238bade (mei: send OS type to the FW)
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Tested-by: Jan Niehusmann <jan@gondor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c | 3 +++
 drivers/misc/mei/debugfs.c   | 2 ++
 drivers/misc/mei/hbm.c       | 4 ++++
 drivers/misc/mei/hw.h        | 6 ++++++
 drivers/misc/mei/mei_dev.h   | 2 ++
 5 files changed, 17 insertions(+)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 18e05ca7584f..3600c9993a98 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -152,6 +152,9 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
 {
 	int ret;
 
+	if (!cldev->bus->hbm_f_os_supported)
+		return;
+
 	ret = mei_cldev_enable(cldev);
 	if (ret)
 		return;
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index c6c051b52f55..c6217a4993ad 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -180,6 +180,8 @@ static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
 				 dev->hbm_f_ev_supported);
 		pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n",
 				 dev->hbm_f_fa_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n",
+				 dev->hbm_f_os_supported);
 	}
 
 	pos += scnprintf(buf + pos, bufsz - pos, "pg:  %s, %s\n",
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index dd7f15a65eed..25b4a1ba522d 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -989,6 +989,10 @@ static void mei_hbm_config_features(struct mei_device *dev)
 	/* Fixed Address Client Support */
 	if (dev->version.major_version >= HBM_MAJOR_VERSION_FA)
 		dev->hbm_f_fa_supported = 1;
+
+	/* OS ver message Support */
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_OS)
+		dev->hbm_f_os_supported = 1;
 }
 
 /**
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index 9daf3f9aed25..e1e4d47d4d7d 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -76,6 +76,12 @@
 #define HBM_MINOR_VERSION_FA               0
 #define HBM_MAJOR_VERSION_FA               2
 
+/*
+ * MEI version with OS ver message support
+ */
+#define HBM_MINOR_VERSION_OS               0
+#define HBM_MAJOR_VERSION_OS               2
+
 /* Host bus message command opcode */
 #define MEI_HBM_CMD_OP_MSK                  0x7f
 /* Host bus message command RESPONSE */
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 699693cd8c59..8dadb98662a9 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -406,6 +406,7 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @hbm_f_ev_supported  : hbm feature event notification
  * @hbm_f_fa_supported  : hbm feature fixed address client
  * @hbm_f_ie_supported  : hbm feature immediate reply to enum request
+ * @hbm_f_os_supported  : hbm feature support OS ver message
  *
  * @me_clients_rwsem: rw lock over me_clients list
  * @me_clients  : list of FW clients
@@ -487,6 +488,7 @@ struct mei_device {
 	unsigned int hbm_f_ev_supported:1;
 	unsigned int hbm_f_fa_supported:1;
 	unsigned int hbm_f_ie_supported:1;
+	unsigned int hbm_f_os_supported:1;
 
 	struct rw_semaphore me_clients_rwsem;
 	struct list_head me_clients;

From 488debb9971bc7d0edd6d8080ba78ca02a04f6c4 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 5 Jan 2017 17:15:01 +0000
Subject: [PATCH 134/258] drivers: char: mem: Fix thinkos in kmem address
 checks

When borrowing the pfn_valid() check from mmap_kmem(), somebody managed
to get physical and virtual addresses spectacularly muddled up, such
that we've ended up with checks for one being the other. Whilst this
does indeed prevent out-of-bounds accesses crashing, on most systems
it also prevents the more desirable use-case of working at all ever.

Check the *virtual* offset correctly for what it is. Furthermore, do
so in the right place - a read or write may span multiple pages, so a
single up-front check is insufficient. High memory accesses already
have a similar validity check just before the copy_to_user() call, so
just make the low memory path fully consistent with that.

Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
CC: stable@vger.kernel.org
Fixes: 148a1bc84398 ("drivers: char: mem: Check {read,write}_kmem() addresses")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/mem.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5bb1985ec484..6d9cc2d39d22 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -381,9 +381,6 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
 	char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
 	int err = 0;
 
-	if (!pfn_valid(PFN_DOWN(p)))
-		return -EIO;
-
 	read = 0;
 	if (p < (unsigned long) high_memory) {
 		low_count = count;
@@ -412,6 +409,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
 			 * by the kernel or data corruption may occur
 			 */
 			kbuf = xlate_dev_kmem_ptr((void *)p);
+			if (!virt_addr_valid(kbuf))
+				return -ENXIO;
 
 			if (copy_to_user(buf, kbuf, sz))
 				return -EFAULT;
@@ -482,6 +481,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
 		 * corruption may occur.
 		 */
 		ptr = xlate_dev_kmem_ptr((void *)p);
+		if (!virt_addr_valid(ptr))
+			return -ENXIO;
 
 		copied = copy_from_user(ptr, buf, sz);
 		if (copied) {
@@ -512,9 +513,6 @@ static ssize_t write_kmem(struct file *file, const char __user *buf,
 	char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
 	int err = 0;
 
-	if (!pfn_valid(PFN_DOWN(p)))
-		return -EIO;
-
 	if (p < (unsigned long) high_memory) {
 		unsigned long to_write = min_t(unsigned long, count,
 					       (unsigned long)high_memory - p);

From 89d8232411a85b9a6b12fd5da4d07d8a138a8e0c Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 13 Dec 2016 17:27:56 +0100
Subject: [PATCH 135/258] tty/serial: atmel_serial: BUG: stop DMA from
 transmitting in stop_tx

If we don't disable the transmitter in atmel_stop_tx, the DMA buffer
continues to send data until it is emptied.
This cause problems with the flow control (CTS is asserted and data are
still sent).

So, disabling the transmitter in atmel_stop_tx is a sane thing to do.

Tested on at91sam9g35-cm(DMA)
Tested for regressions on sama5d2-xplained(Fifo) and at91sam9g20ek(PDC)

Cc: <stable@vger.kernel.org> (beware, this won't apply before 4.3)
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 168b10cad47b..f9d42de5ab2d 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -481,6 +481,14 @@ static void atmel_stop_tx(struct uart_port *port)
 		/* disable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 	}
+
+	/*
+	 * Disable the transmitter.
+	 * This is mandatory when DMA is used, otherwise the DMA buffer
+	 * is fully transmitted.
+	 */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS);
+
 	/* Disable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
 
@@ -513,6 +521,9 @@ static void atmel_start_tx(struct uart_port *port)
 
 	/* Enable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
+
+	/* re-enable the transmitter */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
 }
 
 /*

From b389f173aaa1204d6dc1f299082a162eb0491545 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 6 Dec 2016 13:05:33 +0100
Subject: [PATCH 136/258] tty/serial: atmel: RS485 half duplex w/DMA: enable RX
 after TX is done

When using RS485 in half duplex, RX should be enabled when TX is
finished, and stopped when TX starts.

Before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half
duplex with DMA"), RX was not disabled in atmel_start_tx() if the DMA
was used. So, collisions could happened.

But disabling RX in atmel_start_tx() uncovered another bug:
RX was enabled again in the wrong place (in atmel_tx_dma) instead of
being enabled when TX is finished (in atmel_complete_tx_dma), so the
transmission simply stopped.

This bug was not triggered before commit 0058f0871efe7b01c6
("tty/serial: atmel: fix RS485 half duplex with DMA") because RX was
never disabled before.

Moving atmel_start_rx() in atmel_complete_tx_dma() corrects the problem.

Cc: stable@vger.kernel.org
Reported-by: Gil Weber <webergil@gmail.com>
Fixes: 0058f0871efe7b01c6
Tested-by: Gil Weber <webergil@gmail.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index f9d42de5ab2d..fabbe76203bb 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -809,6 +809,11 @@ static void atmel_complete_tx_dma(void *arg)
 	 */
 	if (!uart_circ_empty(xmit))
 		atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
+	else if ((port->rs485.flags & SER_RS485_ENABLED) &&
+		 !(port->rs485.flags & SER_RS485_RX_DURING_TX)) {
+		/* DMA done, stop TX, start RX for RS485 */
+		atmel_start_rx(port);
+	}
 
 	spin_unlock_irqrestore(&port->lock, flags);
 }
@@ -911,12 +916,6 @@ static void atmel_tx_dma(struct uart_port *port)
 		desc->callback = atmel_complete_tx_dma;
 		desc->callback_param = atmel_port;
 		atmel_port->cookie_tx = dmaengine_submit(desc);
-
-	} else {
-		if (port->rs485.flags & SER_RS485_ENABLED) {
-			/* DMA done, stop TX, start RX for RS485 */
-			atmel_start_rx(port);
-		}
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)

From c130b666a9a711f985a0a44b58699ebe14bb7245 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Date: Wed, 28 Dec 2016 16:42:00 -0200
Subject: [PATCH 137/258] 8250_pci: Fix potential use-after-free in error path

Commit f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during
PCI error recovery") introduces a potential use-after-free in case the
pciserial_init_ports call in serial8250_io_resume fails, which may
happen if a memory allocation fails or if the .init quirk failed for
whatever reason).  If this happen, further pci_get_drvdata will return a
pointer to freed memory.

This patch reworks the PCI recovery resume hook to restore the old priv
structure in this case, which should be ok, since the ports were already
detached. Such error during recovery causes us to give up on the
recovery.

Fixes: f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during
  PCI error recovery")
Reported-by: Michal Suchanek <msuchanek@suse.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index aa0166b6d450..116436b7fa52 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5642,17 +5642,15 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev)
 static void serial8250_io_resume(struct pci_dev *dev)
 {
 	struct serial_private *priv = pci_get_drvdata(dev);
-	const struct pciserial_board *board;
+	struct serial_private *new;
 
 	if (!priv)
 		return;
 
-	board = priv->board;
-	kfree(priv);
-	priv = pciserial_init_ports(dev, board);
-
-	if (!IS_ERR(priv)) {
-		pci_set_drvdata(dev, priv);
+	new = pciserial_init_ports(dev, priv->board);
+	if (!IS_ERR(new)) {
+		pci_set_drvdata(dev, new);
+		kfree(priv);
 	}
 }
 

From 2bed8a8e70729f996af92042d3ad0f11870acc1f Mon Sep 17 00:00:00 2001
From: Daniel Jedrychowski <avistel@gmail.com>
Date: Mon, 12 Dec 2016 09:18:28 +1100
Subject: [PATCH 138/258] Clearing FIFOs in RS485 emulation mode causes
 subsequent transmits to break

When in RS485 emulation mode, __do_stop_tx_rs485() calls
serial8250_clear_fifos().  This not only clears the FIFOs, but also sets
all bits in their control register (UART_FCR) to 0.

One of the effects of this is the disabling of the FIFOs, which turns
them into single-byte holding registers.  The rest of the driver doesn't
know this, which results in the lions share of characters passed into a
write call to be dropped.

(I can supply logic analyzer screenshots if necessary)

This fix replaces the serial8250_clear_fifos() call to
serial8250_clear_and_reinit_fifos() - this prevents the "dropped
characters" issue from manifesting again while retaining the requirement
of clearing the RX FIFO after transmission if the SER_RS485_RX_DURING_TX
flag is disabled.

Signed-off-by: Daniel Jedrychowski <avistel@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fe4399b41df6..c13fec451d03 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1413,7 +1413,7 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p)
 	 * Enable previously disabled RX interrupts.
 	 */
 	if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
-		serial8250_clear_fifos(p);
+		serial8250_clear_and_reinit_fifos(p);
 
 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
 		serial_port_out(&p->port, UART_IER, p->ier);

From 6741f551a0b26479de2532ffa43a366747e6dbf3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Dec 2016 10:05:49 +0800
Subject: [PATCH 139/258] Revert "tty: serial: 8250: add CON_CONSDEV to flags"

This commit needs to be reverted because it prevents people from
using the serial console as a secondary console with input being
directed to tty0.

IOW, if you boot with console=ttyS0 console=tty0 then all kernels
prior to this commit will produce output on both ttyS0 and tty0
but input will only be taken from tty0.  With this patch the serial
console will always be the primary console instead of tty0,
potentially preventing people from getting into their machines in
emergency situations.

Fixes: d03516df8375 ("tty: serial: 8250: add CON_CONSDEV to flags")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 61569a765d9e..76e03a7de9cc 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -675,7 +675,7 @@ static struct console univ8250_console = {
 	.device		= uart_console_device,
 	.setup		= univ8250_console_setup,
 	.match		= univ8250_console_match,
-	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV,
+	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
 	.index		= -1,
 	.data		= &serial8250_reg,
 };

From 5b11ebedd6a8bb4271b796e498cd15c0fe1133b6 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sat, 3 Dec 2016 16:56:49 +0800
Subject: [PATCH 140/258] extcon: return error code on failure

Function get_zeroed_page() returns a NULL pointer if there is no enough
memory. In function extcon_sync(), it returns 0 if the call to
get_zeroed_page() fails. The return value 0 indicates success in the
context, which is incosistent with the execution status. This patch
fixes the bug by returning -ENOMEM.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188611

Signed-off-by: Pan Bian <bianpan2016@163.com>
Fixes: a580982f0836e
Cc: stable <stable@vger.kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 78298460d168..7c1e3a7b14e0 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -453,7 +453,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id)
 		dev_err(&edev->dev, "out of memory in extcon_set_state\n");
 		kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE);
 
-		return 0;
+		return -ENOMEM;
 	}
 
 	length = name_show(&edev->dev, NULL, prop_buf);

From 0fa2c8eb270413160557babda519aa3c21e2bfaf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Dec 2016 16:23:55 +0000
Subject: [PATCH 141/258] ppdev: don't print a free'd string

A previous fix of a memory leak now prints the string 'name'
that was previously free'd.  Fix this by free'ing the string
at the end of the function and adding an error exit path for
the error conditions.

CoverityScan CID#1384523 ("Use after free")

Fixes: 2bd362d5f45c1 ("ppdev: fix memory leak")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ppdev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 02819e0703c8..87885d146dbb 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -290,6 +290,7 @@ static int register_device(int minor, struct pp_struct *pp)
 	struct pardevice *pdev = NULL;
 	char *name;
 	struct pardev_cb ppdev_cb;
+	int rc = 0;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -298,8 +299,8 @@ static int register_device(int minor, struct pp_struct *pp)
 	port = parport_find_number(minor);
 	if (!port) {
 		pr_warn("%s: no associated port!\n", name);
-		kfree(name);
-		return -ENXIO;
+		rc = -ENXIO;
+		goto err;
 	}
 
 	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
@@ -308,16 +309,18 @@ static int register_device(int minor, struct pp_struct *pp)
 	ppdev_cb.private = pp;
 	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
 	parport_put_port(port);
-	kfree(name);
 
 	if (!pdev) {
 		pr_warn("%s: failed to register device!\n", name);
-		return -ENXIO;
+		rc = -ENXIO;
+		goto err;
 	}
 
 	pp->pdev = pdev;
 	dev_dbg(&pdev->dev, "registered pardevice\n");
-	return 0;
+err:
+	kfree(name);
+	return rc;
 }
 
 static enum ieee1284_phase init_phase(int mode)

From 802c03881f29844af0252b6e22be5d2f65f93fd0 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 6 Jan 2017 02:14:16 +0900
Subject: [PATCH 142/258] sysrq: attach sysrq handler correctly for 32-bit
 kernel

The sysrq input handler should be attached to the input device which has
a left alt key.

On 32-bit kernels, some input devices which has a left alt key cannot
attach sysrq handler.  Because the keybit bitmap in struct input_device_id
for sysrq is not correctly initialized.  KEY_LEFTALT is 56 which is
greater than BITS_PER_LONG on 32-bit kernels.

I found this problem when using a matrix keypad device which defines
a KEY_LEFTALT (56) but doesn't have a KEY_O (24 == 56%32).

Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/sysrq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 52bbd27e93ae..701c085bb19b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -946,8 +946,8 @@ static const struct input_device_id sysrq_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { BIT_MASK(KEY_LEFTALT) },
+		.evbit = { [BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_LEFTALT)] = BIT_MASK(KEY_LEFTALT) },
 	},
 	{ },
 };

From 546cf3ef9c92b76ff0037c871b939e63caea98b3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 26 Dec 2016 09:58:34 -0800
Subject: [PATCH 143/258] auxdisplay: fix new ht16k33 build errors

Fix build errors caused by selecting incorrect kconfig symbols.

drivers/built-in.o:(.data+0x19cec): undefined reference to `sys_fillrect'
drivers/built-in.o:(.data+0x19cf0): undefined reference to `sys_copyarea'
drivers/built-in.o:(.data+0x19cf4): undefined reference to `sys_imageblit'

Fixes: 31114fa95bdb (auxdisplay: ht16k33: select framebuffer helper modules)

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/auxdisplay/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 4ef4c5caed4f..8a8e403644d6 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -132,9 +132,9 @@ config HT16K33
 	tristate "Holtek Ht16K33 LED controller with keyscan"
 	depends on FB && OF && I2C && INPUT
 	select FB_SYS_FOPS
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
 	select INPUT_MATRIXKMAP
 	select FB_BACKLIGHT
 	help

From 24b91e360ef521a2808771633d76ebc68bd5604b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 4 Jan 2017 15:12:04 +0100
Subject: [PATCH 144/258] nohz: Fix collision between tick and other hrtimers

When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.

In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.

Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.

In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:

      Task in a full dynticks CPU
      ----------------------------

      * hrtimer A is queued 2 seconds ahead
      * the tick is stopped, scheduled 1 second ahead
      * tick fires 1 second later
      * on tick exit, nohz schedules the tick 1 second ahead but sees
        the clockevent device is already programmed to that deadline,
        fooled by hrtimer A, the tick isn't rescheduled.
      * hrtimer A is cancelled before its deadline
      * tick never fires again until an interrupt happens...

In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.

On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.

Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rik van Riel <riel@redhat.com>
Link: http://lkml.kernel.org/r/1483539124-5693-1-git-send-email-fweisbec@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 9 +++++++--
 kernel/time/tick-sched.h | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2c115fdab397..74e0388cc88d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -767,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	tick = expires;
 
 	/* Skip reprogram of event if its not changed */
-	if (ts->tick_stopped && (expires == dev->next_event))
+	if (ts->tick_stopped && (expires == ts->next_tick))
 		goto out;
 
 	/*
@@ -787,6 +787,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
+	ts->next_tick = tick;
+
 	/*
 	 * If the expiration time == KTIME_MAX, then we simply stop
 	 * the tick timer.
@@ -802,7 +804,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	else
 		tick_program_event(tick, 1);
 out:
-	/* Update the estimated sleep length */
+	/*
+	 * Update the estimated sleep length until the next timer
+	 * (not only the tick).
+	 */
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 	return tick;
 }
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bf38226e5c17..075444e3d48e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -27,6 +27,7 @@ enum tick_nohz_mode {
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
+ * @next_tick:		Next tick to be fired when in dynticks mode.
  * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
@@ -44,6 +45,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				last_tick;
+	ktime_t				next_tick;
 	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;

From c8a6a09c1c617402cc9254b2bc8da359a0347d75 Mon Sep 17 00:00:00 2001
From: Augusto Mecking Caringi <augustocaringi@gmail.com>
Date: Tue, 10 Jan 2017 10:45:00 +0000
Subject: [PATCH 145/258] vme: Fix wrong pointer utilization in
 ca91cx42_slave_get
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ca91cx42_slave_get function, the value pointed by vme_base pointer is
set through:

*vme_base = ioread32(bridge->base + CA91CX42_VSI_BS[i]);

So it must be dereferenced to be used in calculation of pci_base:

*pci_base = (dma_addr_t)*vme_base + pci_offset;

This bug was caught thanks to the following gcc warning:

drivers/vme/bridges/vme_ca91cx42.c: In function ‘ca91cx42_slave_get’:
drivers/vme/bridges/vme_ca91cx42.c:467:14: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
*pci_base = (dma_addr_t)vme_base + pci_offset;

Signed-off-by: Augusto Mecking Caringi <augustocaringi@gmail.com>
Acked-By: Martyn Welch <martyn@welchs.me.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vme/bridges/vme_ca91cx42.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index 6b5ee896af63..7cc51223db1c 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -464,7 +464,7 @@ static int ca91cx42_slave_get(struct vme_slave_resource *image, int *enabled,
 	vme_bound = ioread32(bridge->base + CA91CX42_VSI_BD[i]);
 	pci_offset = ioread32(bridge->base + CA91CX42_VSI_TO[i]);
 
-	*pci_base = (dma_addr_t)vme_base + pci_offset;
+	*pci_base = (dma_addr_t)*vme_base + pci_offset;
 	*size = (unsigned long long)((vme_bound - *vme_base) + granularity);
 
 	*enabled = 0;

From 69d012345a1a32d3f03957f14d972efccc106a98 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie.huang@arm.com>
Date: Wed, 11 Jan 2017 14:02:00 +0800
Subject: [PATCH 146/258] arm64: hugetlb: fix the wrong return value for
 huge_ptep_set_access_flags

In current code, the @changed always returns the last one's status for
the huge page with the contiguous bit set. This is really not what we
want. Even one of the PTEs is changed, we should tell it to the caller.

This patch fixes this issue.

Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
Cc: <stable@vger.kernel.org> # 4.5.x-
Signed-off-by: Huang Shijie <shijie.huang@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 964b7549af5c..e25584d72396 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -239,7 +239,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
 					  *cpte, &pgsize);
 		for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
-			changed = ptep_set_access_flags(vma, addr, cpte,
+			changed |= ptep_set_access_flags(vma, addr, cpte,
 							pfn_pte(pfn,
 								hugeprot),
 							dirty);

From 2d5a9c72d0c4ac73cf97f4b7814ed6c44b1e49ae Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:18 +0100
Subject: [PATCH 147/258] USB: serial: ch341: fix control-message error
 handling

A short control transfer would currently fail to be detected, something
which could lead to stale buffer data being used as valid input.

Check for short transfers, and make sure to log any transfer errors.

Note that this also avoids leaking heap data to user space (TIOCMGET)
and the remote device (break control).

Fixes: 6ce76104781a ("USB: Driver for CH341 USB-serial adaptor")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8d7b0847109b..95aa5233726c 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -113,6 +113,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request,
 	r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
 			    value, index, NULL, 0, DEFAULT_TIMEOUT);
+	if (r < 0)
+		dev_err(&dev->dev, "failed to send control message: %d\n", r);
 
 	return r;
 }
@@ -130,7 +132,20 @@ static int ch341_control_in(struct usb_device *dev,
 	r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
 			    value, index, buf, bufsize, DEFAULT_TIMEOUT);
-	return r;
+	if (r < bufsize) {
+		if (r >= 0) {
+			dev_err(&dev->dev,
+				"short control message received (%d < %u)\n",
+				r, bufsize);
+			r = -EIO;
+		}
+
+		dev_err(&dev->dev, "failed to receive control message: %d\n",
+			r);
+		return r;
+	}
+
+	return 0;
 }
 
 static int ch341_set_baudrate_lcr(struct usb_device *dev,
@@ -181,9 +196,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control)
 
 static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv)
 {
+	const unsigned int size = 2;
 	char *buffer;
 	int r;
-	const unsigned size = 8;
 	unsigned long flags;
 
 	buffer = kmalloc(size, GFP_KERNEL);
@@ -194,14 +209,9 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	/* setup the private status if available */
-	if (r == 2) {
-		r = 0;
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	} else
-		r = -EPROTO;
+	spin_lock_irqsave(&priv->lock, flags);
+	priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT;
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 out:	kfree(buffer);
 	return r;
@@ -211,9 +221,9 @@ out:	kfree(buffer);
 
 static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 {
+	const unsigned int size = 2;
 	char *buffer;
 	int r;
-	const unsigned size = 8;
 
 	buffer = kmalloc(size, GFP_KERNEL);
 	if (!buffer)

From 6d6daa20945f3f598e56e18d1f926c08754f5801 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 5 Jan 2017 10:09:25 -0500
Subject: [PATCH 148/258] perf/x86/intel/uncore: Fix hardcoded socket 0
 assumption in the Haswell init code

hswep_uncore_cpu_init() uses a hardcoded physical package id 0 for the boot
cpu. This works as long as the boot CPU is actually on the physical package
0, which is normaly the case after power on / reboot.

But it fails with a NULL pointer dereference when a kdump kernel is started
on a secondary socket which has a different physical package id because the
locigal package translation for physical package 0 does not exist.

Use the logical package id of the boot cpu instead of hard coded 0.

[ tglx: Rewrote changelog once more ]

Fixes: cf6d445f6897 ("perf/x86/uncore: Track packages, not per CPU data")
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Harish Chegondi <harish.chegondi@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1483628965-2890-1-git-send-email-prarit@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/intel/uncore_snbep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index e6832be714bc..dae2fedc1601 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
 
 void hswep_uncore_cpu_init(void)
 {
-	int pkg = topology_phys_to_logical_pkg(0);
+	int pkg = boot_cpu_data.logical_proc_id;
 
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;

From c38c39bf7cc04d688291f382469e84ec2a8548a4 Mon Sep 17 00:00:00 2001
From: Cedric Izoard <Cedric.Izoard@ceva-dsp.com>
Date: Wed, 11 Jan 2017 14:39:07 +0000
Subject: [PATCH 149/258] mac80211: Fix headroom allocation when forwarding
 mesh pkt

This patch fix issue introduced by my previous commit that
tried to ensure enough headroom was present, and instead
broke it.

When forwarding mesh pkt, mac80211 may also add security header,
and it must therefore be taken into account in the needed headroom.

Fixes: d8da0b5d64d5 ("mac80211: Ensure enough headroom when forwarding mesh pkt")
Signed-off-by: Cedric Izoard <cedric.izoard@ceva-dsp.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c037c5bb6167..c87e61358b77 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2472,7 +2472,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	if (!ifmsh->mshcfg.dot11MeshForwarding)
 		goto out;
 
-	fwd_skb = skb_copy_expand(skb, local->tx_headroom, 0, GFP_ATOMIC);
+	fwd_skb = skb_copy_expand(skb, local->tx_headroom +
+				       sdata->encrypt_headroom, 0, GFP_ATOMIC);
 	if (!fwd_skb) {
 		net_info_ratelimited("%s: failed to clone mesh frame\n",
 				    sdata->name);

From d7f842442f766db3f39fc5d166ddcc24bf817056 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 25 Oct 2016 10:32:16 +0300
Subject: [PATCH 150/258] mac80211: fix the TID on NDPs sent as EOSP carrier

In the commit below, I forgot to translate the mac80211's
AC to QoS IE order. Moreover, the condition in the if was
wrong. Fix both issues.
This bug would hit only with clients that didn't set all
the ACs as delivery enabled.

Fixes: f438ceb81d4 ("mac80211: uapsd_queues is in QoS IE order")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index b6cfcf038c11..50c309094c37 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1501,8 +1501,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 
 		/* This will evaluate to 1, 3, 5 or 7. */
 		for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++)
-			if (ignored_acs & BIT(ac))
-				continue;
+			if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac]))
+				break;
 		tid = 7 - 2 * ac;
 
 		ieee80211_send_null_response(sta, tid, reason, true, false);

From 06f7c88c107fb469f4f1344142e80df5175c6836 Mon Sep 17 00:00:00 2001
From: Beni Lev <beni.lev@intel.com>
Date: Tue, 19 Jul 2016 19:28:56 +0300
Subject: [PATCH 151/258] cfg80211: consider VHT opmode on station update

Currently, this attribute is only fetched on station addition, but
not on station change. Since this info is only present in the assoc
request, with full station state support in the driver it cannot be
present when the station is added.

Thus, add support for changing the VHT opmode on station update if
done before (or while) the station is marked as associated. After
this, ignore it, since it used to be ignored.

Signed-off-by: Beni Lev <beni.lev@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  4 +++-
 net/wireless/nl80211.c       | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6b76e3b0c18e..bea982af9cfb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1772,7 +1772,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode
  *	Notification Element based on association request when used with
- *	%NL80211_CMD_NEW_STATION; u8 attribute.
+ *	%NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when
+ *	%NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS);
+ *	u8 attribute.
  *
  * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if
  *	%NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ef5eff93a8b8..5c1b267e22be 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4615,6 +4615,15 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 		break;
 	}
 
+	/*
+	 * Older kernel versions ignored this attribute entirely, so don't
+	 * reject attempts to update it but mark it as unused instead so the
+	 * driver won't look at the data.
+	 */
+	if (statype != CFG80211_STA_AP_CLIENT_UNASSOC &&
+	    statype != CFG80211_STA_TDLS_PEER_SETUP)
+		params->opmode_notif_used = false;
+
 	return 0;
 }
 EXPORT_SYMBOL(cfg80211_check_station_change);
@@ -4854,6 +4863,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.local_pm = pm;
 	}
 
+	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
+		params.opmode_notif_used = true;
+		params.opmode_notif =
+			nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
+	}
+
 	/* Include parameters for TDLS peer (will check later) */
 	err = nl80211_set_station_tdls(info, &params);
 	if (err)

From 96aa2e7cf126773b16c6c19b7474a8a38d3c707e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 7 Oct 2016 12:23:49 +0200
Subject: [PATCH 152/258] mac80211: calculate min channel width correctly

In the current minimum chandef code there's an issue in that the
recalculation can happen after rate control is initialized for a
station that has a wider bandwidth than the current chanctx, and
then rate control can immediately start using those higher rates
which could cause problems.

Observe that first of all that this problem is because we don't
take non-associated and non-uploaded stations into account. The
restriction to non-associated is quite pointless and is one of
the causes for the problem described above, since the rate init
will happen before the station is set to associated; no frames
could actually be sent until associated, but the rate table can
already contain higher rates and that might cause problems.

Also, rejecting non-uploaded stations is wrong, since the rate
control can select higher rates for those as well.

Secondly, it's then necessary to recalculate the minimal config
before initializing rate control, so that when rate control is
initialized, the higher rates are already available. This can be
done easily by adding the necessary function call in rate init.

Change-Id: Ib9bc02d34797078db55459d196993f39dcd43070
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 3 ---
 net/mac80211/rate.c | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e75cbf6ecc26..a0d901d8992e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -231,9 +231,6 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata)
 		    !(sta->sdata->bss && sta->sdata->bss == sdata->bss))
 			continue;
 
-		if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC))
-			continue;
-
 		max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta));
 	}
 	rcu_read_unlock();
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 206698bc93f4..9e2641d45587 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -40,6 +40,8 @@ void rate_control_rate_init(struct sta_info *sta)
 
 	ieee80211_sta_set_rx_nss(sta);
 
+	ieee80211_recalc_min_chandef(sta->sdata);
+
 	if (!ref)
 		return;
 

From d2941df8fbd9708035d66d889ada4d3d160170ce Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 20 Oct 2016 08:52:50 +0200
Subject: [PATCH 153/258] mac80211: recalculate min channel width on VHT opmode
 changes

When an associated station changes its VHT operating mode this
can/will affect the bandwidth it's using, and consequently we
must recalculate the minimum bandwidth we need to use. Failure
to do so can lead to one of two scenarios:
 1) we use a too high bandwidth, this is benign
 2) we use a too narrow bandwidth, causing rate control and
    actual PHY configuration to be out of sync, which can in
    turn cause problems/crashes

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 21 +++++++++++++++++++++
 net/mac80211/rx.c    |  9 +--------
 net/mac80211/vht.c   |  4 +++-
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 41497b670e2b..d37ae7dc114b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (c) 2016        Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1295,6 +1296,26 @@ static void ieee80211_iface_work(struct work_struct *work)
 		} else if (ieee80211_is_action(mgmt->frame_control) &&
 			   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
 			switch (mgmt->u.action.u.vht_group_notif.action_code) {
+			case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+				struct ieee80211_rx_status *status;
+				enum nl80211_band band;
+				u8 opmode;
+
+				status = IEEE80211_SKB_RXCB(skb);
+				band = status->band;
+				opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+				mutex_lock(&local->sta_mtx);
+				sta = sta_info_get_bss(sdata, mgmt->sa);
+
+				if (sta)
+					ieee80211_vht_handle_opmode(sdata, sta,
+								    opmode,
+								    band);
+
+				mutex_unlock(&local->sta_mtx);
+				break;
+			}
 			case WLAN_VHT_ACTION_GROUPID_MGMT:
 				ieee80211_process_mu_groups(sdata, mgmt);
 				break;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c87e61358b77..3090dd4342f6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2881,17 +2881,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 		switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
 		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
-			u8 opmode;
-
 			/* verify opmode is present */
 			if (len < IEEE80211_MIN_ACTION_SIZE + 2)
 				goto invalid;
-
-			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
-			ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
-						    opmode, status->band);
-			goto handled;
+			goto queue;
 		}
 		case WLAN_VHT_ACTION_GROUPID_MGMT: {
 			if (len < IEEE80211_MIN_ACTION_SIZE + 25)
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 6832bf6ab69f..43e45bb660bc 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -527,8 +527,10 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
 	u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
 
-	if (changed > 0)
+	if (changed > 0) {
+		ieee80211_recalc_min_chandef(sdata);
 		rate_control_rate_update(local, sband, sta, changed);
+	}
 }
 
 void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,

From ad5013d5699d30ded0cdbbc68b93b2aa28222c6e Mon Sep 17 00:00:00 2001
From: Colin King <colin.king@canonical.com>
Date: Wed, 11 Jan 2017 11:43:10 +0000
Subject: [PATCH 154/258] perf/x86/intel: Use ULL constant to prevent undefined
 shift behaviour

When x86_pmu.num_counters is 32 the shift of the integer constant 1 is
exceeding 32bit and therefor undefined behaviour.

Fix this by shifting 1ULL instead of 1.

Reported-by: CoverityScan CID#1192105 ("Bad bit shift operation")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Link: http://lkml.kernel.org/r/20170111114310.17928-1-colin.king@canonical.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 86138267b68a..d611cab214a6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void)
 		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
 		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
 	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+	x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
 
 	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",

From d6169d04097fd9ddf811e63eae4e5cd71e6666e2 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 11 Jan 2017 17:10:34 +0200
Subject: [PATCH 155/258] xhci: fix deadlock at host remove by running watchdog
 correctly

If a URB is killed while the host is removed we can end up in a situation
where the hub thread takes the roothub device lock, and waits for
the URB to be given back by xhci-hcd, blocking the host remove code.

xhci-hcd tries to stop the endpoint and give back the urb, but can't
as the host is removed from PCI bus at the same time, preventing the normal
way of giving back urb.

Instead we need to rely on the stop command timeout function to give back
the urb. This xhci_stop_endpoint_command_watchdog() timeout function
used a XHCI_STATE_DYING flag to indicate if the timeout function is already
running, but later this flag has been taking into use in other places to
mark that xhci is dying.

Remove checks for XHCI_STATE_DYING in xhci_urb_dequeue. We are still
checking that reading from pci state does not return 0xffffffff or that
host is not halted before trying to stop the endpoint.

This whole area of stopping endpoints, giving back URBs, and the wathdog
timeout need rework, this fix focuses on solving a specific deadlock
issue that we can then send to stable before any major rework.

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 11 -----------
 drivers/usb/host/xhci.c      | 13 -------------
 2 files changed, 24 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 25f522b09dd9..e32029a31ca4 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -913,17 +913,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
 	spin_lock_irqsave(&xhci->lock, flags);
 
 	ep->stop_cmds_pending--;
-	if (xhci->xhc_state & XHCI_STATE_REMOVING) {
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		return;
-	}
-	if (xhci->xhc_state & XHCI_STATE_DYING) {
-		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-				"Stop EP timer ran, but another timer marked "
-				"xHCI as DYING, exiting.");
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		return;
-	}
 	if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 				"Stop EP timer ran, but no command pending, "
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0c8deb9ed42d..9a0ec116654a 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1534,19 +1534,6 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 		xhci_urb_free_priv(urb_priv);
 		return ret;
 	}
-	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
-			(xhci->xhc_state & XHCI_STATE_HALTED)) {
-		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-				"Ep 0x%x: URB %p to be canceled on "
-				"non-responsive xHCI host.",
-				urb->ep->desc.bEndpointAddress, urb);
-		/* Let the stop endpoint command watchdog timer (which set this
-		 * state) finish cleaning up the endpoint TD lists.  We must
-		 * have caught it in the middle of dropping a lock and giving
-		 * back an URB.
-		 */
-		goto done;
-	}
 
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
 	ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];

From 1392370ee7de8aa3f69936f55bea6bfcc9879c59 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jan 2017 14:29:02 +0300
Subject: [PATCH 156/258] nvme-rdma: fix nvme_rdma_queue_is_ready

Now that we don't abuse the cmd field in struct request for nvme command
passthrough this function needs to be converted to the proper accessor
as well.

Fixes: d49187e97e ("nvme: introduce struct nvme_request")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f587af345889..34e564857716 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1422,7 +1422,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
 		struct request *rq)
 {
 	if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
-		struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+		struct nvme_command *cmd = nvme_req(rq)->cmd;
 
 		if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
 		    cmd->common.opcode != nvme_fabrics_command ||

From b5a10c5f7532b7473776da87e67f8301bbc32693 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Wed, 28 Dec 2016 22:13:15 -0200
Subject: [PATCH 157/258] nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time
 too

Commit 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter
readiness") introduced a quirk to adapters that cannot read the bit
NVME_CSTS_RDY right after register NVME_REG_CC is set; these adapters
need a delay or else the action of reading the bit NVME_CSTS_RDY could
somehow corrupt adapter's registers state and it never recovers.

When this quirk was added, we checked ctrl->tagset in order to avoid
quirking in probe time, supposing we would never require such delay
during probe. Well, it was too optimistic; we in fact need this quirk
at probe time in some cases, like after a kexec.

In some experiments, after abnormal shutdown of machine (aka power cord
unplug), we booted into our bootloader in Power, which is a Linux kernel,
and kexec'ed into another distro. If this kexec is too quick, we end up
reaching the probe of NVMe adapter in that distro when adapter is in
bad state (not fully initialized on our bootloader). What happens next
is that nvme_wait_ready() is unable to complete, except if the quirk is
enabled.

So, this patch removes the original ctrl->tagset verification in order
to enable the quirk even on probe time.

Fixes: 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness")
Reported-by: Andrew Byrne <byrneadw@ie.ibm.com>
Reported-by: Jaime A. H. Gomez <jahgomez@mx1.ibm.com>
Reported-by: Zachary D. Myers <zdmyers@us.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Acked-by: Jeffrey Lien <Jeff.Lien@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2fc86dc7a8df..8a3c3e32a704 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1106,12 +1106,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
 	if (ret)
 		return ret;
 
-	/* Checking for ctrl->tagset is a trick to avoid sleeping on module
-	 * load, since we only need the quirk on reset_controller. Notice
-	 * that the HGST device needs this delay only in firmware activation
-	 * procedure; unfortunately we have no (easy) way to verify this.
-	 */
-	if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset)
+	if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
 		msleep(NVME_QUIRK_DELAY_AMOUNT);
 
 	return nvme_wait_ready(ctrl, cap, false);

From 0a417b8dc1f10b03e8f558b8a831f07ec4c23795 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 11 Jan 2017 10:20:04 -0800
Subject: [PATCH 158/258] xfs: Timely free truncated dirty pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 99579ccec4e2 "xfs: skip dirty pages in ->releasepage()" started
to skip dirty pages in xfs_vm_releasepage() which also has the effect
that if a dirty page is truncated, it does not get freed by
block_invalidatepage() and is lingering in LRU list waiting for reclaim.
So a simple loop like:

while true; do
	dd if=/dev/zero of=file bs=1M count=100
	rm file
done

will keep using more and more memory until we hit low watermarks and
start pagecache reclaim which will eventually reclaim also the truncate
pages. Keeping these truncated (and thus never usable) pages in memory
is just a waste of memory, is unnecessarily stressing page cache
reclaim, and reportedly also leads to anonymous mmap(2) returning ENOMEM
prematurely.

So instead of just skipping dirty pages in xfs_vm_releasepage(), return
to old behavior of skipping them only if they have delalloc or unwritten
buffers and fix the spurious warnings by warning only if the page is
clean.

CC: stable@vger.kernel.org
CC: Brian Foster <bfoster@redhat.com>
CC: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Petr Tůma <petr.tuma@d3s.mff.cuni.cz>
Fixes: 99579ccec4e271c3d4d4e7c946058766812afdab
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0f56fcd3a5d5..631e7c0e0a29 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1152,19 +1152,22 @@ xfs_vm_releasepage(
 	 * block_invalidatepage() can send pages that are still marked dirty
 	 * but otherwise have invalidated buffers.
 	 *
-	 * We've historically freed buffers on the latter. Instead, quietly
-	 * filter out all dirty pages to avoid spurious buffer state warnings.
-	 * This can likely be removed once shrink_active_list() is fixed.
+	 * We want to release the latter to avoid unnecessary buildup of the
+	 * LRU, skip the former and warn if we've left any lingering
+	 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
+	 * or unwritten buffers and warn if the page is not dirty. Otherwise
+	 * try to release the buffers.
 	 */
-	if (PageDirty(page))
-		return 0;
-
 	xfs_count_page_state(page, &delalloc, &unwritten);
 
-	if (WARN_ON_ONCE(delalloc))
+	if (delalloc) {
+		WARN_ON_ONCE(!PageDirty(page));
 		return 0;
-	if (WARN_ON_ONCE(unwritten))
+	}
+	if (unwritten) {
+		WARN_ON_ONCE(!PageDirty(page));
 		return 0;
+	}
 
 	return try_to_free_buffers(page);
 }

From 6ed0993a0b859ce62edf2930ded683e452286d39 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:27:49 +0300
Subject: [PATCH 159/258] vfio-mdev: return -EFAULT if copy_to_user() fails

The copy_to_user() function returns the number of bytes which it wasn't
able to copy but we want to return a negative error code.

Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 1fc57a5093a7..975af5bbf28d 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1180,7 +1180,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 
 		memcpy(&mdev_state->dev_info, &info, sizeof(info));
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 	case VFIO_DEVICE_GET_REGION_INFO:
 	{
@@ -1201,7 +1204,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (ret)
 			return ret;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 
 	case VFIO_DEVICE_GET_IRQ_INFO:
@@ -1224,7 +1230,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (info.count == -1)
 			return -EINVAL;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 	case VFIO_DEVICE_SET_IRQS:
 	{

From 5c677869e0abbffbade2cfd82d46d0eebe823f34 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:28:40 +0300
Subject: [PATCH 160/258] vfio-mdev: buffer overflow in ioctl()

This is a sample driver for documentation so the impact is probably
pretty low.  But we should check that bar_index is valid so we
don't write beyond the end of the mdev_state->region_info[] array.

Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 975af5bbf28d..382f4797428f 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1073,7 +1073,7 @@ int mtty_get_region_info(struct mdev_device *mdev,
 {
 	unsigned int size = 0;
 	struct mdev_state *mdev_state;
-	int bar_index;
+	u32 bar_index;
 
 	if (!mdev)
 		return -EINVAL;
@@ -1082,8 +1082,11 @@ int mtty_get_region_info(struct mdev_device *mdev,
 	if (!mdev_state)
 		return -EINVAL;
 
-	mutex_lock(&mdev_state->ops_lock);
 	bar_index = region_info->index;
+	if (bar_index >= VFIO_PCI_NUM_REGIONS)
+		return -EINVAL;
+
+	mutex_lock(&mdev_state->ops_lock);
 
 	switch (bar_index) {
 	case VFIO_PCI_CONFIG_REGION_INDEX:

From 73da4268fdbae972f617946d1c690f2136964802 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:30:08 +0300
Subject: [PATCH 161/258] vfio-mdev: remove some dead code

We set info.count to 1 in mtty_get_irq_info() so static checkers
complain that, "Why do we have impossible conditions?"  The answer is
that it seems to be left over dead code that can be safely removed.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 382f4797428f..ca495686b9c3 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1230,9 +1230,6 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (ret)
 			return ret;
 
-		if (info.count == -1)
-			return -EINVAL;
-
 		if (copy_to_user((void __user *)arg, &info, minsz))
 			return -EFAULT;
 

From a89af4abdf9b353cdd6f61afc0eaaac403304873 Mon Sep 17 00:00:00 2001
From: Brendan McGrath <redmcg@redmandi.dyndns.org>
Date: Sat, 7 Jan 2017 08:01:38 +1100
Subject: [PATCH 162/258] HID: i2c-hid: Add sleep between POWER ON and RESET

Support for the Asus Touchpad was recently added. It turns out this
device can fail initialisation (and become unusable) when the RESET
command is sent too soon after the POWER ON command.

Unfortunately the i2c-hid specification does not specify the need for
a delay between these two commands. But it was discovered the Windows
driver has a 1ms delay.

As a result, this patch modifies the i2c-hid module to add a sleep
inbetween the POWER ON and RESET commands which lasts between 1ms and 5ms.

See https://github.com/vlasenko/hid-asus-dkms/issues/24 for further
details.

Signed-off-by: Brendan McGrath <redmcg@redmandi.dyndns.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 78fb32a7b103..ea3c3546cef7 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -426,6 +426,15 @@ static int i2c_hid_hwreset(struct i2c_client *client)
 	if (ret)
 		goto out_unlock;
 
+	/*
+	 * The HID over I2C specification states that if a DEVICE needs time
+	 * after the PWR_ON request, it should utilise CLOCK stretching.
+	 * However, it has been observered that the Windows driver provides a
+	 * 1ms sleep between the PWR_ON and RESET requests and that some devices
+	 * rely on this.
+	 */
+	usleep_range(1000, 5000);
+
 	i2c_hid_dbg(ihid, "resetting...\n");
 
 	ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0);

From 19c0f40d4fca3a47b8f784a627f0467f0138ccc8 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Wed, 11 Jan 2017 16:25:34 +0800
Subject: [PATCH 163/258] r8152: fix the sw rx checksum is unavailable

Fix the hw rx checksum is always enabled, and the user couldn't switch
it to sw rx checksum.

Note that the RTL_VER_01 only support sw rx checksum only. Besides,
the hw rx checksum for RTL_VER_02 is disabled after
commit b9a321b48af4 ("r8152: Fix broken RX checksums."). Re-enable it.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index be418563cb18..f3b48ad90865 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
 	u8 checksum = CHECKSUM_NONE;
 	u32 opts2, opts3;
 
-	if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02)
+	if (!(tp->netdev->features & NETIF_F_RXCSUM))
 		goto return_result;
 
 	opts2 = le32_to_cpu(rx_desc->opts2);
@@ -4356,6 +4356,11 @@ static int rtl8152_probe(struct usb_interface *intf,
 				NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
 				NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
+	if (tp->version == RTL_VER_01) {
+		netdev->features &= ~NETIF_F_RXCSUM;
+		netdev->hw_features &= ~NETIF_F_RXCSUM;
+	}
+
 	netdev->ethtool_ops = &ops;
 	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 

From 4ecb1d83f6abe8d49163427f4d431ebe98f8bd5f Mon Sep 17 00:00:00 2001
From: Martynas Pumputis <martynas@weave.works>
Date: Wed, 11 Jan 2017 15:18:53 +0000
Subject: [PATCH 164/258] vxlan: Set ports in flow key when doing route lookups

Otherwise, a xfrm policy with sport/dport being set cannot be matched.

Signed-off-by: Martynas Pumputis <martynas@weave.works>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index bb70dd5723b5..ca7196c40060 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1798,7 +1798,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
 				      struct vxlan_sock *sock4,
 				      struct sk_buff *skb, int oif, u8 tos,
-				      __be32 daddr, __be32 *saddr,
+				      __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
 				      struct dst_cache *dst_cache,
 				      const struct ip_tunnel_info *info)
 {
@@ -1824,6 +1824,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device
 	fl4.flowi4_proto = IPPROTO_UDP;
 	fl4.daddr = daddr;
 	fl4.saddr = *saddr;
+	fl4.fl4_dport = dport;
+	fl4.fl4_sport = sport;
 
 	rt = ip_route_output_key(vxlan->net, &fl4);
 	if (likely(!IS_ERR(rt))) {
@@ -1851,6 +1853,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 					  __be32 label,
 					  const struct in6_addr *daddr,
 					  struct in6_addr *saddr,
+					  __be16 dport, __be16 sport,
 					  struct dst_cache *dst_cache,
 					  const struct ip_tunnel_info *info)
 {
@@ -1877,6 +1880,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = IPPROTO_UDP;
+	fl6.fl6_dport = dport;
+	fl6.fl6_sport = sport;
 
 	err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
 					 sock6->sock->sk,
@@ -2068,6 +2073,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				     rdst ? rdst->remote_ifindex : 0, tos,
 				     dst->sin.sin_addr.s_addr,
 				     &src->sin.sin_addr.s_addr,
+				     dst_port, src_port,
 				     dst_cache, info);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
@@ -2104,6 +2110,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 					rdst ? rdst->remote_ifindex : 0, tos,
 					label, &dst->sin6.sin6_addr,
 					&src->sin6.sin6_addr,
+					dst_port, src_port,
 					dst_cache, info);
 		if (IS_ERR(ndst)) {
 			err = PTR_ERR(ndst);
@@ -2430,7 +2437,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 
 		rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
 				     info->key.u.ipv4.dst,
-				     &info->key.u.ipv4.src, NULL, info);
+				     &info->key.u.ipv4.src, dport, sport, NULL, info);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 		ip_rt_put(rt);
@@ -2441,7 +2448,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 
 		ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
 					info->key.label, &info->key.u.ipv6.dst,
-					&info->key.u.ipv6.src, NULL, info);
+					&info->key.u.ipv6.src, dport, sport, NULL, info);
 		if (IS_ERR(ndst))
 			return PTR_ERR(ndst);
 		dst_release(ndst);

From 0719e72ccb801829a3d735d187ca8417f0930459 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Wed, 11 Jan 2017 09:16:32 -0800
Subject: [PATCH 165/258] netvsc: add rcu_read locking to netvsc callback

The receive callback (in tasklet context) is using RCU to get reference
to associated VF network device but this is not safe. RCU read lock
needs to be held. Found by running with full lockdep debugging
enabled.

Fixes: f207c10d9823 ("hv_netvsc: use RCU to protect vf_netdev")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c9414c054852..fcab8019dda0 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -659,6 +659,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	 * policy filters on the host). Deliver these via the VF
 	 * interface in the guest.
 	 */
+	rcu_read_lock();
 	vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
 	if (vf_netdev && (vf_netdev->flags & IFF_UP))
 		net = vf_netdev;
@@ -667,6 +668,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
 	if (unlikely(!skb)) {
 		++net->stats.rx_dropped;
+		rcu_read_unlock();
 		return NVSP_STAT_FAIL;
 	}
 
@@ -696,6 +698,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	 * TODO - use NAPI?
 	 */
 	netif_rx(skb);
+	rcu_read_unlock();
 
 	return 0;
 }

From 900742d89c1b4e04bd373aec8470b88e183f08ca Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:22 -0600
Subject: [PATCH 166/258] x86/unwind: Silence warnings for non-current tasks

There are a handful of callers to save_stack_trace_tsk() and
show_stack() which try to unwind the stack of a task other than current.
In such cases, it's remotely possible that the task is running on one
CPU while the unwinder is reading its stack from another CPU, causing
the unwinder to see stack corruption.

These cases seem to be mostly harmless.  The unwinder has checks which
prevent it from following bad pointers beyond the bounds of the stack.
So it's not really a bug as long as the caller understands that
unwinding another task will not always succeed.

Since stack "corruption" on another task's stack isn't necessarily a
bug, silence the warnings when unwinding tasks other than current.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/00d8c50eea3446c1524a2a755397a3966629354c.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/unwind_frame.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 4443e499f279..195eebf6da20 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -207,6 +207,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	return true;
 
 bad_address:
+	/*
+	 * When unwinding a non-current task, the task might actually be
+	 * running on another CPU, in which case it could be modifying its
+	 * stack while we're reading it.  This is generally not a problem and
+	 * can be ignored as long as the caller understands that unwinding
+	 * another task will not always succeed.
+	 */
+	if (state->task != current)
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",

From 84936118bdf37bda513d4a361c38181a216427e0 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:23 -0600
Subject: [PATCH 167/258] x86/unwind: Disable KASAN checks for non-current
 tasks

There are a handful of callers to save_stack_trace_tsk() and
show_stack() which try to unwind the stack of a task other than current.
In such cases, it's remotely possible that the task is running on one
CPU while the unwinder is reading its stack from another CPU, causing
the unwinder to see stack corruption.

These cases seem to be mostly harmless.  The unwinder has checks which
prevent it from following bad pointers beyond the bounds of the stack.
So it's not really a bug as long as the caller understands that
unwinding another task will not always succeed.

In such cases, it's possible that the unwinder may read a KASAN-poisoned
region of the stack.  Account for that by using READ_ONCE_NOCHECK() when
reading the stack of another task.

Use READ_ONCE() when reading the stack of the current task, since KASAN
warnings can still be useful for finding bugs in that case.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/4c575eb288ba9f73d498dfe0acde2f58674598f1.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/stacktrace.h |  5 ++++-
 arch/x86/kernel/unwind_frame.c    | 20 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a3269c897ec5..20ce3db20f24 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -52,13 +52,16 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 static inline unsigned long *
 get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
+	struct inactive_task_frame *frame;
+
 	if (regs)
 		return (unsigned long *)regs->bp;
 
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
+	frame = (struct inactive_task_frame *)task->thread.sp;
+	return (unsigned long *)READ_ONCE_NOCHECK(frame->bp);
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 195eebf6da20..23d15565d02a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,21 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+
 static void unwind_dump(struct unwind_state *state, unsigned long *sp)
 {
 	static bool dumped_before = false;
@@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		return 0;
 
-	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+	addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
+	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
 				     addr_p);
 
 	return __kernel_text_address(addr) ? addr : 0;
@@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;
 	else
-		next_bp = (unsigned long *)*state->bp;
+		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
 
 	/* is the next frame pointer an encoded pointer to pt_regs? */
 	regs = decode_frame_pointer(next_bp);

From 2c96b2fe9c57b4267c3f0a680d82d7cc52e1c447 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:24 -0600
Subject: [PATCH 168/258] x86/unwind: Include __schedule() in stack traces

In the following commit:

  0100301bfdf5 ("sched/x86: Rewrite the switch_to() code")

... the layout of the 'inactive_task_frame' struct was designed to have
a frame pointer header embedded in it, so that the unwinder could use
the 'bp' and 'ret_addr' fields to report __schedule() on the stack (or
ret_from_fork() for newly forked tasks which haven't actually run yet).

Finish the job by changing get_frame_pointer() to return a pointer to
inactive_task_frame's 'bp' field rather than 'bp' itself.  This allows
the unwinder to start one frame higher on the stack, so that it properly
reports __schedule().

Reported-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/598e9f7505ed0aba86e8b9590aa528c6c7ae8dcd.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/stacktrace.h |  5 +----
 arch/x86/include/asm/switch_to.h  | 10 +++++++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 20ce3db20f24..2e41c50ddf47 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -52,16 +52,13 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 static inline unsigned long *
 get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
-	struct inactive_task_frame *frame;
-
 	if (regs)
 		return (unsigned long *)regs->bp;
 
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	frame = (struct inactive_task_frame *)task->thread.sp;
-	return (unsigned long *)READ_ONCE_NOCHECK(frame->bp);
+	return &((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5cb436acd463..fcc5cd387fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev,
 
 asmlinkage void ret_from_fork(void);
 
-/* data that is pointed to by thread.sp */
+/*
+ * This is the structure pointed to by thread.sp for an inactive task.  The
+ * order of the fields must match the code in __switch_to_asm().
+ */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
 	unsigned long r15;
@@ -48,6 +51,11 @@ struct inactive_task_frame {
 	unsigned long di;
 #endif
 	unsigned long bx;
+
+	/*
+	 * These two fields must be together.  They form a stack frame header,
+	 * needed by get_frame_pointer().
+	 */
 	unsigned long bp;
 	unsigned long ret_addr;
 };

From ff3f7e2475bbf9201e95824e72698fcdc5c3d47a Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:25 -0600
Subject: [PATCH 169/258] x86/entry: Fix the end of the stack for newly forked
 tasks

When unwinding a task, the end of the stack is always at the same offset
right below the saved pt_regs, regardless of which syscall was used to
enter the kernel.  That convention allows the unwinder to verify that a
stack is sane.

However, newly forked tasks don't always follow that convention, as
reported by the following unwinder warning seen by Dave Jones:

  WARNING: kernel stack frame pointer at ffffc90001443f30 in kworker/u8:8:30468 has bad value           (null)

The warning was due to the following call chain:

  (ftrace handler)
  call_usermodehelper_exec_async+0x5/0x140
  ret_from_fork+0x22/0x30

The problem is that ret_from_fork() doesn't create a stack frame before
calling other functions.  Fix that by carefully using the frame pointer
macros.

In addition to conforming to the end of stack convention, this also
makes related stack traces more sensible by making it clear to the user
that ret_from_fork() was involved.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8854cdaab980e9700a81e9ebf0d4238e4bbb68ef.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_32.S | 30 +++++++++++-------------------
 arch/x86/entry/entry_64.S | 11 +++++++----
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 701d29f8e4d3..57f7ec35216e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -254,23 +254,6 @@ ENTRY(__switch_to_asm)
 	jmp	__switch_to
 END(__switch_to_asm)
 
-/*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-ENTRY(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	ret
-ENDPROC(schedule_tail_wrapper)
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	call	schedule_tail_wrapper
+	FRAME_BEGIN		/* help unwinder find end of stack */
+
+	/*
+	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
+	 * on the stack.
+	 */
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
+	leal	FRAME_OFFSET(%esp), %eax
 	call    syscall_return_slowpath
+	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5b219707c2f2..044d18ebc43c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -408,17 +409,19 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
+	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
+	call	schedule_tail		/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
+	testq	%rbx, %rbx		/* from kernel_thread? */
+	jnz	1f			/* kernel threads are uncommon */
 
 2:
-	movq	%rsp, %rdi
+	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
+	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:

From e1d070c3793a2766122865a7c2142853b48808c5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 21 Dec 2016 00:19:19 +0100
Subject: [PATCH 170/258] mmc: sdhci-acpi: Only powered up enabled acpi child
 devices

Commit e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are
powered when probing") introduced code to powerup any acpi child
nodes listed in the dstd. But some dstd-s list all possible devices
used on some board variants, while reporting if the device is actually
present and enabled in the status field of the device.

So we end up calling the acpi _PS0 (power-on) method for devices which
are not actually present. This does not always end well, e.g. on my
cube iwork8 air tablet, this results in freezing the entire tablet as
soon as the r8723bs module is loaded.

This commit fixes this by checking the child device's status.present
and status.enabled bits and only call acpi_device_fix_up_power()
if both are set.

Fixes: e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are powered when probing")
BugLink: https://github.com/hadess/rtl8723bs/issues/80
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-acpi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 160f695cc09c..278a5a435ab7 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -395,7 +395,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	/* Power on the SDHCI controller and its children */
 	acpi_device_fix_up_power(device);
 	list_for_each_entry(child, &device->children, node)
-		acpi_device_fix_up_power(child);
+		if (child->status.present && child->status.enabled)
+			acpi_device_fix_up_power(child);
 
 	if (acpi_bus_get_status(device) || !device->status.present)
 		return -ENODEV;

From 01167c7b9cbf099c69fe411a228e4e9c7104e123 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Thu, 5 Jan 2017 19:24:04 +0000
Subject: [PATCH 171/258] mmc: mxs-mmc: Fix additional cycles after
 transmission stop

According to the code the intention is to append 8 SCK cycles
instead of 4 at end of a MMC_STOP_TRANSMISSION command. But this
will never happened because it's an AC command not an ADTC command.
So fix this by moving the statement into the right function.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: e4243f13d10e (mmc: mxs-mmc: add mmc host driver for i.MX23/28)
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mxs-mmc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 44ecebd1ea8c..c8b8ac66ff7e 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -309,6 +309,9 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host)
 	cmd0 = BF_SSP(cmd->opcode, CMD0_CMD);
 	cmd1 = cmd->arg;
 
+	if (cmd->opcode == MMC_STOP_TRANSMISSION)
+		cmd0 |= BM_SSP_CMD0_APPEND_8CYC;
+
 	if (host->sdio_irq_en) {
 		ctrl0 |= BM_SSP_CTRL0_SDIO_IRQ_CHECK;
 		cmd0 |= BM_SSP_CMD0_CONT_CLKING_EN | BM_SSP_CMD0_SLOW_CLKING_EN;
@@ -417,8 +420,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
 		       ssp->base + HW_SSP_BLOCK_SIZE);
 	}
 
-	if ((cmd->opcode == MMC_STOP_TRANSMISSION) ||
-	    (cmd->opcode == SD_IO_RW_EXTENDED))
+	if (cmd->opcode == SD_IO_RW_EXTENDED)
 		cmd0 |= BM_SSP_CMD0_APPEND_8CYC;
 
 	cmd1 = cmd->arg;

From b6416e61012429e0277bd15a229222fd17afc1c1 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 16 Dec 2016 14:30:35 -0800
Subject: [PATCH 172/258] jump_labels: API for flushing deferred jump label
 updates

Modules that use static_key_deferred need a way to synchronize with
any delayed work that is still pending when the module is unloaded.
Introduce static_key_deferred_flush() which flushes any pending
jump label updates.

Signed-off-by: David Matlack <dmatlack@google.com>
Cc: stable@vger.kernel.org
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/jump_label_ratelimit.h | 5 +++++
 kernel/jump_label.c                  | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index 089f70f83e97..23da3af459fe 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -14,6 +14,7 @@ struct static_key_deferred {
 
 #ifdef HAVE_JUMP_LABEL
 extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
+extern void static_key_deferred_flush(struct static_key_deferred *key);
 extern void
 jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
@@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 	STATIC_KEY_CHECK_USE();
 	static_key_slow_dec(&key->key);
 }
+static inline void static_key_deferred_flush(struct static_key_deferred *key)
+{
+	STATIC_KEY_CHECK_USE();
+}
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 93ad6c1fb9b6..a9b8cf500591 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key)
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 
+void static_key_deferred_flush(struct static_key_deferred *key)
+{
+	STATIC_KEY_CHECK_USE();
+	flush_delayed_work(&key->work);
+}
+EXPORT_SYMBOL_GPL(static_key_deferred_flush);
+
 void jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {

From cef84c302fe051744b983a92764d3fcca933415d Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 16 Dec 2016 14:30:36 -0800
Subject: [PATCH 173/258] KVM: x86: flush pending lapic jump label updates on
 module unload

KVM's lapic emulation uses static_key_deferred (apic_{hw,sw}_disabled).
These are implemented with delayed_work structs which can still be
pending when the KVM module is unloaded. We've seen this cause kernel
panics when the kvm_intel module is quickly reloaded.

Use the new static_key_deferred_flush() API to flush pending updates on
module unload.

Signed-off-by: David Matlack <dmatlack@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 6 ++++++
 arch/x86/kvm/lapic.h | 1 +
 arch/x86/kvm/x86.c   | 1 +
 3 files changed, 8 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5fe290c1b7d8..2f6ef5121a4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2426,3 +2426,9 @@ void kvm_lapic_init(void)
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
+
+void kvm_lapic_exit(void)
+{
+	static_key_deferred_flush(&apic_hw_disabled);
+	static_key_deferred_flush(&apic_sw_disabled);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e0c80233b3e1..ff8039d61672 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -110,6 +110,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
+void kvm_lapic_exit(void);
 
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2f22810a7e0c..a0ac6e0060fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6045,6 +6045,7 @@ out:
 
 void kvm_arch_exit(void)
 {
+	kvm_lapic_exit();
 	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 
 	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))

From 129a72a0d3c8e139a04512325384fe5ac119e74d Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 11 Jan 2017 18:28:29 -0800
Subject: [PATCH 174/258] KVM: x86: Introduce segmented_write_std

Introduces segemented_write_std.

Switches from emulated reads/writes to standard read/writes in fxsave,
fxrstor, sgdt, and sidt.  This fixes CVE-2017-2584, a longstanding
kernel memory leak.

Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR",
2016-11-09), which is luckily not yet in any final release, this would
also be an exploitable kernel memory *write*!

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Fixes: 96051572c819194c37a8367624b285be10297eca
Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56628a44668b..f36d0fa6b885 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -818,6 +818,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
 }
 
+static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
+			       struct segmented_address addr,
+			       void *data,
+			       unsigned int size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, true, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
 /*
  * Prefetch the remaining bytes of the instruction without crossing page
  * boundary if they are not in fetch_cache yet.
@@ -3685,8 +3699,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
 	}
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
-	return segmented_write(ctxt, ctxt->dst.addr.mem,
-			       &desc_ptr, 2 + ctxt->op_bytes);
+	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
+				   &desc_ptr, 2 + ctxt->op_bytes);
 }
 
 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
@@ -3932,7 +3946,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
 	else
 		size = offsetof(struct fxregs_state, xmm_space[0]);
 
-	return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
 }
 
 static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
@@ -3974,7 +3988,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 

From 4f3dbdf47e150016aacd734e663347fcaa768303 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 5 Jan 2017 17:39:42 -0800
Subject: [PATCH 175/258] KVM: eventfd: fix NULL deref irqbypass consumer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported syzkaller:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
    IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    PGD 0

    Oops: 0002 [#1] SMP
    CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1
    Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
    task: ffff9bbe0dfbb900 task.stack: ffffb61802014000
    RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    Call Trace:
     irqfd_shutdown+0x66/0xa0 [kvm]
     process_one_work+0x16b/0x480
     worker_thread+0x4b/0x500
     kthread+0x101/0x140
     ? process_one_work+0x480/0x480
     ? kthread_create_on_node+0x60/0x60
     ret_from_fork+0x25/0x30
    RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20
    CR2: 0000000000000008

The syzkaller folks reported a NULL pointer dereference that due to
unregister an consumer which fails registration before. The syzkaller
creates two VMs w/ an equal eventfd occasionally. So the second VM
fails to register an irqbypass consumer. It will make irqfd as inactive
and queue an workqueue work to shutdown irqfd and unregister the irqbypass
consumer when eventfd is closed. However, the second consumer has been
initialized though it fails registration. So the token(same as the first
VM's) is taken to unregister the consumer through the workqueue, the
consumer of the first VM is found and unregistered, then NULL deref incurred
in the path of deleting consumer from the consumers list.

This patch fixes it by making irq_bypass_register/unregister_consumer()
looks for the consumer entry based on consumer pointer itself instead of
token matching.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/lib/irqbypass.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 52abac4bb6a2..6d2fcd6fcb25 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
 	mutex_lock(&lock);
 
 	list_for_each_entry(tmp, &consumers, node) {
-		if (tmp->token == consumer->token) {
+		if (tmp->token == consumer->token || tmp == consumer) {
 			mutex_unlock(&lock);
 			module_put(THIS_MODULE);
 			return -EBUSY;
@@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
 	mutex_lock(&lock);
 
 	list_for_each_entry(tmp, &consumers, node) {
-		if (tmp->token != consumer->token)
+		if (tmp != consumer)
 			continue;
 
 		list_for_each_entry(producer, &producers, node) {

From 546d87e5c903a7f3ee7b9f998949a94729fbc65b Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 3 Jan 2017 18:56:19 -0800
Subject: [PATCH 176/258] KVM: x86: fix NULL deref in vcpu_scan_ioapic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

    BUG: unable to handle kernel NULL pointer dereference at 00000000000001b0
    IP: _raw_spin_lock+0xc/0x30
    PGD 3e28eb067
    PUD 3f0ac6067
    PMD 0
    Oops: 0002 [#1] SMP
    CPU: 0 PID: 2431 Comm: test Tainted: G           OE   4.10.0-rc1+ #3
    Call Trace:
     ? kvm_ioapic_scan_entry+0x3e/0x110 [kvm]
     kvm_arch_vcpu_ioctl_run+0x10a8/0x15f0 [kvm]
     ? pick_next_task_fair+0xe1/0x4e0
     ? kvm_arch_vcpu_load+0xea/0x260 [kvm]
     kvm_vcpu_ioctl+0x33a/0x600 [kvm]
     ? hrtimer_try_to_cancel+0x29/0x130
     ? do_nanosleep+0x97/0xf0
     do_vfs_ioctl+0xa1/0x5d0
     ? __hrtimer_init+0x90/0x90
     ? do_nanosleep+0x5b/0xf0
     SyS_ioctl+0x79/0x90
     do_syscall_64+0x6e/0x180
     entry_SYSCALL64_slow_path+0x25/0x25
    RIP: _raw_spin_lock+0xc/0x30 RSP: ffffa43688973cc0

The syzkaller folks reported a NULL pointer dereference due to
ENABLE_CAP succeeding even without an irqchip.  The Hyper-V
synthetic interrupt controller is activated, resulting in a
wrong request to rescan the ioapic and a NULL pointer dereference.

    #include <sys/ioctl.h>
    #include <sys/mman.h>
    #include <sys/types.h>
    #include <linux/kvm.h>
    #include <pthread.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>

    #ifndef KVM_CAP_HYPERV_SYNIC
    #define KVM_CAP_HYPERV_SYNIC 123
    #endif

    void* thr(void* arg)
    {
	struct kvm_enable_cap cap;
	cap.flags = 0;
	cap.cap = KVM_CAP_HYPERV_SYNIC;
	ioctl((long)arg, KVM_ENABLE_CAP, &cap);
	return 0;
    }

    int main()
    {
	void *host_mem = mmap(0, 0x1000, PROT_READ|PROT_WRITE,
			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	int kvmfd = open("/dev/kvm", 0);
	int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
	struct kvm_userspace_memory_region memreg;
	memreg.slot = 0;
	memreg.flags = 0;
	memreg.guest_phys_addr = 0;
	memreg.memory_size = 0x1000;
	memreg.userspace_addr = (unsigned long)host_mem;
	host_mem[0] = 0xf4;
	ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
	int cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
	struct kvm_sregs sregs;
	ioctl(cpufd, KVM_GET_SREGS, &sregs);
	sregs.cr0 = 0;
	sregs.cr4 = 0;
	sregs.efer = 0;
	sregs.cs.selector = 0;
	sregs.cs.base = 0;
	ioctl(cpufd, KVM_SET_SREGS, &sregs);
	struct kvm_regs regs = { .rflags = 2 };
	ioctl(cpufd, KVM_SET_REGS, &regs);
	ioctl(vmfd, KVM_CREATE_IRQCHIP, 0);
	pthread_t th;
	pthread_create(&th, 0, thr, (void*)(long)cpufd);
	usleep(rand() % 10000);
	ioctl(cpufd, KVM_RUN, 0);
	pthread_join(th, 0);
	return 0;
    }

This patch fixes it by failing ENABLE_CAP if without an irqchip.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 5c919412fe61 (kvm/x86: Hyper-V synthetic interrupt controller)
Cc: stable@vger.kernel.org # 4.5+
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0ac6e0060fb..57d8a856cdc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3342,6 +3342,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 	switch (cap->cap) {
 	case KVM_CAP_HYPERV_SYNIC:
+		if (!irqchip_in_kernel(vcpu->kvm))
+			return -EINVAL;
 		return kvm_hv_activate_synic(vcpu);
 	default:
 		return -EINVAL;

From 19c816e8e455f58da9997e4c6626f06203d8fbf0 Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 12 Jan 2017 16:52:02 +0800
Subject: [PATCH 177/258] capability: export has_capability

has_capability() is sometimes needed by modules to test capability
for specified task other than current, so export it.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Jike Song <jike.song@intel.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 kernel/capability.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/capability.c b/kernel/capability.c
index a98e814f216f..f97fe77ceb88 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -318,6 +318,7 @@ bool has_capability(struct task_struct *t, int cap)
 {
 	return has_ns_capability(t, &init_user_ns, cap);
 }
+EXPORT_SYMBOL(has_capability);
 
 /**
  * has_ns_capability_noaudit - Does a task have a capability (unaudited)

From 33ab91103b3415e12457e3104f0e4517ce12d0f3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2017 15:02:32 +0100
Subject: [PATCH 178/258] KVM: x86: fix emulation of "MOV SS, null selector"

This is CVE-2017-2583.  On Intel this causes a failed vmentry because
SS's type is neither 3 nor 7 (even though the manual says this check is
only done for usable SS, and the dmesg splat says that SS is unusable!).
On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb.

The fix fabricates a data segment descriptor when SS is set to a null
selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb.
Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3;
this in turn ensures CPL < 3 because RPL must be equal to CPL.

Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing
the bug and deciphering the manuals.

Reported-by: Xiaohan Zhang <zhangxiaohan1@huawei.com>
Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011
Cc: stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f36d0fa6b885..cedbba0f3402 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1585,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				    &ctxt->exception);
 }
 
-/* Does not support long mode */
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				     u16 selector, int seg, u8 cpl,
 				     enum x86_transfer_type transfer,
@@ -1622,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 	rpl = selector & 3;
 
-	/* NULL selector is not valid for TR, CS and SS (except for long mode) */
-	if ((seg == VCPU_SREG_CS
-	     || (seg == VCPU_SREG_SS
-		 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
-	     || seg == VCPU_SREG_TR)
-	    && null_selector)
-		goto exception;
-
 	/* TR should be in GDT only */
 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
 		goto exception;
 
-	if (null_selector) /* for NULL selector skip all following checks */
+	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
+	if (null_selector) {
+		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
+			goto exception;
+
+		if (seg == VCPU_SREG_SS) {
+			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
+				goto exception;
+
+			/*
+			 * ctxt->ops->set_segment expects the CPL to be in
+			 * SS.DPL, so fake an expand-up 32-bit data segment.
+			 */
+			seg_desc.type = 3;
+			seg_desc.p = 1;
+			seg_desc.s = 1;
+			seg_desc.dpl = cpl;
+			seg_desc.d = 1;
+			seg_desc.g = 1;
+		}
+
+		/* Skip all following checks */
 		goto load;
+	}
 
 	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
 	if (ret != X86EMUL_CONTINUE)
@@ -1751,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				   u16 selector, int seg)
 {
 	u8 cpl = ctxt->ops->cpl(ctxt);
+
+	/*
+	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
+	 * they can load it at CPL<3 (Intel's manual says only LSS can,
+	 * but it's wrong).
+	 *
+	 * However, the Intel manual says that putting IST=1/DPL=3 in
+	 * an interrupt gate will result in SS=3 (the AMD manual instead
+	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
+	 * and only forbid it here.
+	 */
+	if (seg == VCPU_SREG_SS && selector == 3 &&
+	    ctxt->mode == X86EMUL_MODE_PROT64)
+		return emulate_exception(ctxt, GP_VECTOR, 0, true);
+
 	return __load_segment_descriptor(ctxt, selector, seg, cpl,
 					 X86_TRANSFER_NONE, NULL);
 }

From 36bf38d158d3482119b3e159c0619b3c1539b508 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 12 Jan 2017 09:10:37 +0100
Subject: [PATCH 179/258] mlxsw: spectrum: Fix memory leak at skb reallocation

During transmission the skb is checked for headroom in order to
add vendor specific header. In case the skb needs to be re-allocated,
skb_realloc_headroom() is called to make a private copy of the original,
but doesn't release it. Current code assumes that the original skb is
released during reallocation and only releases it at the error path
which causes a memory leak.

Fix this by adding the original skb release to the main path.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d768c7b6c6d6..003093abb170 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -684,6 +684,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 			dev_kfree_skb_any(skb_orig);
 			return NETDEV_TX_OK;
 		}
+		dev_consume_skb_any(skb_orig);
 	}
 
 	if (eth_skb_pad(skb)) {

From 400fc0106dd8c27ed84781c929c1a184785b9c79 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 12 Jan 2017 09:10:38 +0100
Subject: [PATCH 180/258] mlxsw: switchx2: Fix memory leak at skb reallocation

During transmission the skb is checked for headroom in order to
add vendor specific header. In case the skb needs to be re-allocated,
skb_realloc_headroom() is called to make a private copy of the original,
but doesn't release it. Current code assumes that the original skb is
released during reallocation and only releases it at the error path
which causes a memory leak.

Fix this by adding the original skb release to the main path.

Fixes: d003462a50de ("mlxsw: Simplify mlxsw_sx_port_xmit function")
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 150ccf5192a9..2e88115e8735 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -345,6 +345,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 			dev_kfree_skb_any(skb_orig);
 			return NETDEV_TX_OK;
 		}
+		dev_consume_skb_any(skb_orig);
 	}
 	mlxsw_sx_txhdr_construct(skb, &tx_info);
 	/* TX header is consumed by HW on the way so we shouldn't count its

From 28e46a0f2e03ab4ed0e23cace1ea89a68c8c115b Mon Sep 17 00:00:00 2001
From: Elad Raz <eladr@mellanox.com>
Date: Thu, 12 Jan 2017 09:10:39 +0100
Subject: [PATCH 181/258] mlxsw: pci: Fix EQE structure definition

The event_data starts from address 0x00-0x0C and not from 0x08-0x014. This
leads to duplication with other fields in the Event Queue Element such as
sub-type, cqn and owner.

Fixes: eda6500a987a0 ("mlxsw: Add PCI bus implementation")
Signed-off-by: Elad Raz <eladr@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index d147ddd97997..0af3338bfcb4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -209,21 +209,21 @@ MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
 /* pci_eqe_cmd_token
  * Command completion event - token
  */
-MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
+MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16);
 
 /* pci_eqe_cmd_status
  * Command completion event - status
  */
-MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
+MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8);
 
 /* pci_eqe_cmd_out_param_h
  * Command completion event - output parameter - higher part
  */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
+MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32);
 
 /* pci_eqe_cmd_out_param_l
  * Command completion event - output parameter - lower part
  */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32);
 
 #endif

From f99e86485cc32cd16e5cc97f9bb0474f28608d84 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 12 Jan 2017 07:58:32 -0700
Subject: [PATCH 182/258] block: Rename blk_queue_zone_size and bdev_zone_size

All block device data fields and functions returning a number of 512B
sectors are by convention named xxx_sectors while names in the form
xxx_size are generally used for a number of bytes. The blk_queue_zone_size
and bdev_zone_size functions were not following this convention so rename
them.

No functional change is introduced by this patch.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>

Collapsed the two patches, they were nonsensically split and broke
bisection.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-zoned.c         |  4 ++--
 block/partition-generic.c | 14 +++++++-------
 fs/f2fs/segment.c         |  4 ++--
 fs/f2fs/super.c           |  6 +++---
 include/linux/blkdev.h    |  6 +++---
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 472211fa183a..3bd15d8095b1 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -16,7 +16,7 @@
 static inline sector_t blk_zone_start(struct request_queue *q,
 				      sector_t sector)
 {
-	sector_t zone_mask = blk_queue_zone_size(q) - 1;
+	sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
 
 	return sector & ~zone_mask;
 }
@@ -222,7 +222,7 @@ int blkdev_reset_zones(struct block_device *bdev,
 		return -EINVAL;
 
 	/* Check alignment (handle eventual smaller last zone) */
-	zone_sectors = blk_queue_zone_size(q);
+	zone_sectors = blk_queue_zone_sectors(q);
 	if (sector & (zone_sectors - 1))
 		return -EINVAL;
 
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7beb6bbbf66..7afb9907821f 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -434,7 +434,7 @@ static bool part_zone_aligned(struct gendisk *disk,
 			      struct block_device *bdev,
 			      sector_t from, sector_t size)
 {
-	unsigned int zone_size = bdev_zone_size(bdev);
+	unsigned int zone_sectors = bdev_zone_sectors(bdev);
 
 	/*
 	 * If this function is called, then the disk is a zoned block device
@@ -446,7 +446,7 @@ static bool part_zone_aligned(struct gendisk *disk,
 	 * regular block devices (no zone operation) and their zone size will
 	 * be reported as 0. Allow this case.
 	 */
-	if (!zone_size)
+	if (!zone_sectors)
 		return true;
 
 	/*
@@ -455,24 +455,24 @@ static bool part_zone_aligned(struct gendisk *disk,
 	 * use it. Check the zone size too: it should be a power of 2 number
 	 * of sectors.
 	 */
-	if (WARN_ON_ONCE(!is_power_of_2(zone_size))) {
+	if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
 		u32 rem;
 
-		div_u64_rem(from, zone_size, &rem);
+		div_u64_rem(from, zone_sectors, &rem);
 		if (rem)
 			return false;
 		if ((from + size) < get_capacity(disk)) {
-			div_u64_rem(size, zone_size, &rem);
+			div_u64_rem(size, zone_sectors, &rem);
 			if (rem)
 				return false;
 		}
 
 	} else {
 
-		if (from & (zone_size - 1))
+		if (from & (zone_sectors - 1))
 			return false;
 		if ((from + size) < get_capacity(disk) &&
-		    (size & (zone_size - 1)))
+		    (size & (zone_sectors - 1)))
 			return false;
 
 	}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0738f48293cc..0d8802453758 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -713,8 +713,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 	}
 	sector = SECTOR_FROM_BLOCK(blkstart);
 
-	if (sector & (bdev_zone_size(bdev) - 1) ||
-				nr_sects != bdev_zone_size(bdev)) {
+	if (sector & (bdev_zone_sectors(bdev) - 1) ||
+	    nr_sects != bdev_zone_sectors(bdev)) {
 		f2fs_msg(sbi->sb, KERN_INFO,
 			"(%d) %s: Unaligned discard attempted (block %x + %x)",
 			devi, sbi->s_ndevs ? FDEV(devi).path: "",
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 702638e21c76..46fd30d8af77 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1553,16 +1553,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 		return 0;
 
 	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
-				SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+				SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
 		return -EINVAL;
-	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
 	if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
 				__ilog2_u32(sbi->blocks_per_blkz))
 		return -EINVAL;
 	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
 	FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
 					sbi->log_blocks_per_blkz;
-	if (nr_sectors & (bdev_zone_size(bdev) - 1))
+	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
 		FDEV(devi).nr_blkz++;
 
 	FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 83695641bd5e..ff3d774f2751 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -739,7 +739,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 	}
 }
 
-static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 {
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
@@ -1536,12 +1536,12 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
 	return false;
 }
 
-static inline unsigned int bdev_zone_size(struct block_device *bdev)
+static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 
 	if (q)
-		return blk_queue_zone_size(q);
+		return blk_queue_zone_sectors(q);
 
 	return 0;
 }

From 18a3ed59d09cf81a6447aadf6931bf0c9ffec5e0 Mon Sep 17 00:00:00 2001
From: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Date: Thu, 12 Jan 2017 13:21:06 +0100
Subject: [PATCH 183/258] ravb: Remove Rx overflow log messages

Remove Rx overflow log messages as in an environment where logging results
in network traffic logging may cause further overflows.

Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
[simon: reworked changelog]
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 92d7692c840d..5e5ad978eab9 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -926,14 +926,10 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 	/* Receive error message handling */
 	priv->rx_over_errors =  priv->stats[RAVB_BE].rx_over_errors;
 	priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
-	if (priv->rx_over_errors != ndev->stats.rx_over_errors) {
+	if (priv->rx_over_errors != ndev->stats.rx_over_errors)
 		ndev->stats.rx_over_errors = priv->rx_over_errors;
-		netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n");
-	}
-	if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) {
+	if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
 		ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
-		netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n");
-	}
 out:
 	return budget - quota;
 }

From 4b09ec4b14a168bf2c687e1f598140c3c11e9222 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 5 Jan 2017 10:20:16 -0500
Subject: [PATCH 184/258] nfs: Don't take a reference on fl->fl_file for LOCK
 operation

I have reports of a crash that look like __fput() was called twice for
a NFSv4.0 file.  It seems possible that the state manager could try to
reclaim a lock and take a reference on the fl->fl_file at the same time the
file is being released if, during the close(), a signal interrupts the wait
for outstanding IO while removing locks which then skips the removal
of that lock.

Since 83bfff23e9ed ("nfs4: have do_vfs_lock take an inode pointer") has
removed the need to traverse fl->fl_file->f_inode in nfs4_lock_done(),
taking that reference is no longer necessary.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dcbc5defb7a..700ed1fc1075 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,7 +38,6 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
-#include <linux/file.h>
 #include <linux/string.h>
 #include <linux/ratelimit.h>
 #include <linux/printk.h>
@@ -6127,7 +6126,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->server = server;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
-	get_file(fl->fl_file);
 	memcpy(&p->fl, fl, sizeof(p->fl));
 	return p;
 out_free_seqid:
@@ -6240,7 +6238,6 @@ static void nfs4_lock_release(void *calldata)
 		nfs_free_seqid(data->arg.lock_seqid);
 	nfs4_put_lock_state(data->lsp);
 	put_nfs_open_context(data->ctx);
-	fput(data->fl.fl_file);
 	kfree(data);
 	dprintk("%s: done!\n", __func__);
 }

From 41c066f2c4d436c535616fe182331766c57838f0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 11 Jan 2017 14:54:53 +0000
Subject: [PATCH 185/258] arm64: assembler: make adr_l work in modules under
 KASLR

When CONFIG_RANDOMIZE_MODULE_REGION_FULL=y, the offset between loaded
modules and the core kernel may exceed 4 GB, putting symbols exported
by the core kernel out of the reach of the ordinary adrp/add instruction
pairs used to generate relative symbol references. So make the adr_l
macro emit a movz/movk sequence instead when executing in module context.

While at it, remove the pointless special case for the stack pointer.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/assembler.h | 36 ++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 446f6c46d4b1..3a4301163e04 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -164,22 +164,25 @@ lr	.req	x30		// link register
 
 /*
  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
- * <symbol> is within the range +/- 4 GB of the PC.
+ * <symbol> is within the range +/- 4 GB of the PC when running
+ * in core kernel context. In module context, a movz/movk sequence
+ * is used, since modules may be loaded far away from the kernel
+ * when KASLR is in effect.
  */
 	/*
 	 * @dst: destination register (64 bit wide)
 	 * @sym: name of the symbol
-	 * @tmp: optional scratch register to be used if <dst> == sp, which
-	 *       is not allowed in an adrp instruction
 	 */
-	.macro	adr_l, dst, sym, tmp=
-	.ifb	\tmp
+	.macro	adr_l, dst, sym
+#ifndef MODULE
 	adrp	\dst, \sym
 	add	\dst, \dst, :lo12:\sym
-	.else
-	adrp	\tmp, \sym
-	add	\dst, \tmp, :lo12:\sym
-	.endif
+#else
+	movz	\dst, #:abs_g3:\sym
+	movk	\dst, #:abs_g2_nc:\sym
+	movk	\dst, #:abs_g1_nc:\sym
+	movk	\dst, #:abs_g0_nc:\sym
+#endif
 	.endm
 
 	/*
@@ -190,6 +193,7 @@ lr	.req	x30		// link register
 	 *       the address
 	 */
 	.macro	ldr_l, dst, sym, tmp=
+#ifndef MODULE
 	.ifb	\tmp
 	adrp	\dst, \sym
 	ldr	\dst, [\dst, :lo12:\sym]
@@ -197,6 +201,15 @@ lr	.req	x30		// link register
 	adrp	\tmp, \sym
 	ldr	\dst, [\tmp, :lo12:\sym]
 	.endif
+#else
+	.ifb	\tmp
+	adr_l	\dst, \sym
+	ldr	\dst, [\dst]
+	.else
+	adr_l	\tmp, \sym
+	ldr	\dst, [\tmp]
+	.endif
+#endif
 	.endm
 
 	/*
@@ -206,8 +219,13 @@ lr	.req	x30		// link register
 	 *       while <src> needs to be preserved.
 	 */
 	.macro	str_l, src, sym, tmp
+#ifndef MODULE
 	adrp	\tmp, \sym
 	str	\src, [\tmp, :lo12:\sym]
+#else
+	adr_l	\tmp, \sym
+	str	\src, [\tmp]
+#endif
 	.endm
 
 	/*

From cc8e8342930129aa2c9b629e1653e4681f0896ea Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 4 Jan 2017 16:21:58 +0800
Subject: [PATCH 186/258] ceph: fix mds cluster availability check

We should apply the check after getting the initial mdsmap.

Fixes: e9e427f0a14f ("ceph: check availability of mds cluster on mount")
Link: http://tracker.ceph.com/issues/18161
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f49253387a0..ec6b35e9f966 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2106,6 +2106,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
 			dout("do_request mdsmap err %d\n", err);
 			goto finish;
 		}
+		if (mdsc->mdsmap->m_epoch == 0) {
+			dout("do_request no mdsmap, waiting for map\n");
+			list_add(&req->r_wait, &mdsc->waiting_for_map);
+			goto finish;
+		}
 		if (!(mdsc->fsc->mount_options->flags &
 		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
 		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {

From 84fcc2d2bd6cbf621e49e1d0f7eaef2e3c666b40 Mon Sep 17 00:00:00 2001
From: "Geng, Jichao" <geng.jichao@h3c.com>
Date: Thu, 5 Jan 2017 16:50:39 +0800
Subject: [PATCH 187/258] ceph: fix get_oldest_context()

For no snapshot case, we should use ci->truncate_{seq,size}.

Fixes: 5f743e456606 ("ceph: record truncate size/seq for snap data writeback")
Signed-off-by: Geng, Jichao <geng.jichao@h3c.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/addr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9cd0c0ea7cdb..e4b066cd912a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -502,9 +502,9 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
 		dout(" head snapc %p has %d dirty pages\n",
 		     snapc, ci->i_wrbuffer_ref_head);
 		if (truncate_size)
-			*truncate_size = capsnap->truncate_size;
+			*truncate_size = ci->i_truncate_size;
 		if (truncate_seq)
-			*truncate_seq = capsnap->truncate_seq;
+			*truncate_seq = ci->i_truncate_seq;
 	}
 	spin_unlock(&ci->i_ceph_lock);
 	return snapc;

From 30f939feaeee23e21391cfc7b484f012eb189c3c Mon Sep 17 00:00:00 2001
From: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Date: Mon, 9 Jan 2017 22:53:36 +0700
Subject: [PATCH 188/258] i2c: fix kernel memory disclosure in dev interface

i2c_smbus_xfer() does not always fill an entire block, allowing
kernel stack memory disclosure through the temp variable. Clear
it before it's read to.

Signed-off-by: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/i2c-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 66f323fd3982..6f638bbc922d 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -331,7 +331,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client,
 		unsigned long arg)
 {
 	struct i2c_smbus_ioctl_data data_arg;
-	union i2c_smbus_data temp;
+	union i2c_smbus_data temp = {};
 	int datasize, res;
 
 	if (copy_from_user(&data_arg,

From 331c34255293cd02d395b7097008b509ba89e60e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Jan 2017 20:57:22 -0800
Subject: [PATCH 189/258] i2c: do not enable fall back to Host Notify by
 default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Falling back unconditionally to HostNotify as primary client's interrupt
breaks some drivers which alter their functionality depending on whether
interrupt is present or not, so let's introduce a board flag telling I2C
core explicitly if we want wired interrupt or HostNotify-based one:
I2C_CLIENT_HOST_NOTIFY.

For DT-based systems we introduce "host-notify" property that we convert
to I2C_CLIENT_HOST_NOTIFY board flag.

Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c.txt |  8 ++++++++
 drivers/i2c/i2c-core.c                        | 17 ++++++++---------
 include/linux/i2c.h                           |  1 +
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
index 5fa691e6f638..cee9d5055fa2 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -62,6 +62,9 @@ wants to support one of the below features, it should adapt the bindings below.
 	"irq" and "wakeup" names are recognized by I2C core, other names are
 	left to individual drivers.
 
+- host-notify
+	device uses SMBus host notify protocol instead of interrupt line.
+
 - multi-master
 	states that there is another master active on this bus. The OS can use
 	this information to adapt power management to keep the arbitration awake
@@ -81,6 +84,11 @@ Binding may contain optional "interrupts" property, describing interrupts
 used by the device. I2C core will assign "irq" interrupt (or the very first
 interrupt if not using interrupt names) as primary interrupt for the slave.
 
+Alternatively, devices supporting SMbus Host Notify, and connected to
+adapters that support this feature, may use "host-notify" property. I2C
+core will create a virtual interrupt for Host Notify and assign it as
+primary interrupt for the slave.
+
 Also, if device is marked as a wakeup source, I2C core will set up "wakeup"
 interrupt for the device. If "wakeup" interrupt name is not present in the
 binding, then primary interrupt will be used as wakeup interrupt.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index cf9e396d7702..7b117240f1ea 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -931,7 +931,10 @@ static int i2c_device_probe(struct device *dev)
 	if (!client->irq) {
 		int irq = -ENOENT;
 
-		if (dev->of_node) {
+		if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
+			dev_dbg(dev, "Using Host Notify IRQ\n");
+			irq = i2c_smbus_host_notify_to_irq(client);
+		} else if (dev->of_node) {
 			irq = of_irq_get_byname(dev->of_node, "irq");
 			if (irq == -EINVAL || irq == -ENODATA)
 				irq = of_irq_get(dev->of_node, 0);
@@ -940,14 +943,7 @@ static int i2c_device_probe(struct device *dev)
 		}
 		if (irq == -EPROBE_DEFER)
 			return irq;
-		/*
-		 * ACPI and OF did not find any useful IRQ, try to see
-		 * if Host Notify can be used.
-		 */
-		if (irq < 0) {
-			dev_dbg(dev, "Using Host Notify IRQ\n");
-			irq = i2c_smbus_host_notify_to_irq(client);
-		}
+
 		if (irq < 0)
 			irq = 0;
 
@@ -1716,6 +1712,9 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 	info.of_node = of_node_get(node);
 	info.archdata = &dev_ad;
 
+	if (of_property_read_bool(node, "host-notify"))
+		info.flags |= I2C_CLIENT_HOST_NOTIFY;
+
 	if (of_get_property(node, "wakeup-source", NULL))
 		info.flags |= I2C_CLIENT_WAKE;
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b2109c522dec..4b45ec46161f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -665,6 +665,7 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
 					/* Must equal I2C_M_TEN below */
 #define I2C_CLIENT_SLAVE	0x20	/* we are the slave */
+#define I2C_CLIENT_HOST_NOTIFY	0x40	/* We want to use I2C host notify */
 #define I2C_CLIENT_WAKE		0x80	/* for board_info; true iff can wake */
 #define I2C_CLIENT_SCCB		0x9000	/* Use Omnivision SCCB protocol */
 					/* Must match I2C_M_STOP|IGNORE_NAK */

From 6f724fb3039522486fce2e32e4c0fbe238a6ab02 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 6 Jan 2017 19:02:57 +0800
Subject: [PATCH 190/258] i2c: print correct device invalid address

In of_i2c_register_device(), when the check for
device address validity fails we print the info.addr,
which has not been assigned properly.

Fix this by printing the actual invalid address.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: b4e2f6ac1281 ("i2c: apply DT flags when probing")
Cc: stable@kernel.org
---
 drivers/i2c/i2c-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7b117240f1ea..c26296c2eac5 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1704,7 +1704,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 
 	if (i2c_check_addr_validity(addr, info.flags)) {
 		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
-			info.addr, node->full_name);
+			addr, node->full_name);
 		return ERR_PTR(-EINVAL);
 	}
 

From 2659161dd40dbb599a19f320164373093df44a89 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Dec 2016 22:27:33 +0000
Subject: [PATCH 191/258] i2c: fix spelling mistake: "insufficent" ->
 "insufficient"

Trivial fix to spelling mistake in WARN message, insufficient has
an insufficient number of i's in the spelling.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c26296c2eac5..583e95042a21 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -3632,7 +3632,7 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 	int ret;
 
 	if (!client || !slave_cb) {
-		WARN(1, "insufficent data\n");
+		WARN(1, "insufficient data\n");
 		return -EINVAL;
 	}
 

From 701dc207bf551d9fe6defa36e84a911e880398c3 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Wed, 11 Jan 2017 10:11:44 +0100
Subject: [PATCH 192/258] i2c: piix4: Avoid race conditions with IMC

On AMD's SB800 and upwards, the SMBus is shared with the Integrated
Micro Controller (IMC).

The platform provides a hardware semaphore to avoid race conditions
among them. (Check page 288 of the SB800-Series Southbridges Register
Reference Guide http://support.amd.com/TechDocs/45482.pdf)

Without this patch, many access to the SMBus end with an invalid
transaction or even with the bus stalled.

Reported-by: Alexandre Desnoyers <alex@qtec.com>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>:
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index c2268cdf38e8..e34d82e79b98 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -585,10 +585,29 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 		 u8 command, int size, union i2c_smbus_data *data)
 {
 	struct i2c_piix4_adapdata *adapdata = i2c_get_adapdata(adap);
+	unsigned short piix4_smba = adapdata->smba;
+	int retries = MAX_TIMEOUT;
+	int smbslvcnt;
 	u8 smba_en_lo;
 	u8 port;
 	int retval;
 
+	/* Request the SMBUS semaphore, avoid conflicts with the IMC */
+	smbslvcnt  = inb_p(SMBSLVCNT);
+	do {
+		outb_p(smbslvcnt | 0x10, SMBSLVCNT);
+
+		/* Check the semaphore status */
+		smbslvcnt  = inb_p(SMBSLVCNT);
+		if (smbslvcnt & 0x10)
+			break;
+
+		usleep_range(1000, 2000);
+	} while (--retries);
+	/* SMBus is still owned by the IMC, we give up */
+	if (!retries)
+		return -EBUSY;
+
 	mutex_lock(&piix4_mutex_sb800);
 
 	outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
@@ -606,6 +625,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
 	mutex_unlock(&piix4_mutex_sb800);
 
+	/* Release the semaphore */
+	outb_p(smbslvcnt | 0x20, SMBSLVCNT);
+
 	return retval;
 }
 

From ea7a80858f57d8878b1499ea0f1b8a635cc48de7 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 11 Jan 2017 14:29:54 -0800
Subject: [PATCH 193/258] net: lwtunnel: Handle lwtunnel_fill_encap failure

Handle failure in lwtunnel_fill_encap adding attributes to skb.

Fixes: 571e722676fe ("ipv4: support for fib route lwtunnel encap attributes")
Fixes: 19e42e451506 ("ipv6: support for fib route lwtunnel encap attributes")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 11 +++++++----
 net/ipv6/route.c         |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index eba1546b5031..9a375b908d01 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1279,8 +1279,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
 			goto nla_put_failure;
 #endif
-		if (fi->fib_nh->nh_lwtstate)
-			lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
+		if (fi->fib_nh->nh_lwtstate &&
+		    lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
+			goto nla_put_failure;
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
@@ -1316,8 +1317,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
 				goto nla_put_failure;
 #endif
-			if (nh->nh_lwtstate)
-				lwtunnel_fill_encap(skb, nh->nh_lwtstate);
+			if (nh->nh_lwtstate &&
+			    lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
+				goto nla_put_failure;
+
 			/* length of rtnetlink header + attributes */
 			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
 		} endfor_nexthops(fi);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ce5aaf448c54..4f6b067c8753 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3317,7 +3317,8 @@ static int rt6_fill_node(struct net *net,
 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
 		goto nla_put_failure;
 
-	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
+	if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;

From 994c5483e7f6dbf9fea622ba2031b9d868feb4b9 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Wed, 11 Jan 2017 16:45:51 -0600
Subject: [PATCH 194/258] net: qcom/emac: grab a reference to the phydev on
 ACPI systems

Commit 6ffe1c4cd0a7 ("net: qcom/emac: fix of_node and phydev leaks")
fixed the problem with reference leaks on phydev, but the fix is
device-tree specific.  When the driver unloads, the reference is
dropped only on DT systems.

Instead, it's cleaner if up grab an reference on ACPI systems.
When the driver unloads, we can drop the reference without having
to check whether we're on a DT system.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/emac/emac-phy.c | 7 +++++++
 drivers/net/ethernet/qualcomm/emac/emac.c     | 6 ++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 99a14df28b96..2851b4c56570 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -201,6 +201,13 @@ int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt)
 		else
 			adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr);
 
+		/* of_phy_find_device() claims a reference to the phydev,
+		 * so we do that here manually as well. When the driver
+		 * later unloads, it can unilaterally drop the reference
+		 * without worrying about ACPI vs DT.
+		 */
+		if (adpt->phydev)
+			get_device(&adpt->phydev->mdio.dev);
 	} else {
 		struct device_node *phy_np;
 
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 422289c232bc..f46d300bd585 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -719,8 +719,7 @@ static int emac_probe(struct platform_device *pdev)
 err_undo_napi:
 	netif_napi_del(&adpt->rx_q.napi);
 err_undo_mdiobus:
-	if (!has_acpi_companion(&pdev->dev))
-		put_device(&adpt->phydev->mdio.dev);
+	put_device(&adpt->phydev->mdio.dev);
 	mdiobus_unregister(adpt->mii_bus);
 err_undo_clocks:
 	emac_clks_teardown(adpt);
@@ -740,8 +739,7 @@ static int emac_remove(struct platform_device *pdev)
 
 	emac_clks_teardown(adpt);
 
-	if (!has_acpi_companion(&pdev->dev))
-		put_device(&adpt->phydev->mdio.dev);
+	put_device(&adpt->phydev->mdio.dev);
 	mdiobus_unregister(adpt->mii_bus);
 	free_netdev(netdev);
 

From 8a430ed50bb1b19ca14a46661f3b1b35f2fb5c39 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 11 Jan 2017 15:42:17 -0800
Subject: [PATCH 195/258] net: ipv4: fix table id in getroute response

rtm_table is an 8-bit field while table ids are allowed up to u32. Commit
709772e6e065 ("net: Fix routing tables with id > 255 for legacy software")
added the preference to set rtm_table in dumps to RT_TABLE_COMPAT if the
table id is > 255. The table id returned on get route requests should do
the same.

Fixes: c36ba6603a11 ("net: Allow user to get table id from route lookup")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0fcac8e7a2b2..709ffe67d1de 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2472,7 +2472,7 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= fl4->flowi4_tos;
-	r->rtm_table	= table_id;
+	r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT;
 	if (nla_put_u32(skb, RTA_TABLE, table_id))
 		goto nla_put_failure;
 	r->rtm_type	= rt->rt_type;

From 2dfc61736482441993bfb7dfaa971113b53f107c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 11 Jan 2017 08:47:00 -0500
Subject: [PATCH 196/258] NFSv4: Call update_changeattr() from _nfs4_proc_open
 only if a file was created

We don't want to invalidate the directory attribute and data cache unless we
know that a file was created, or the change attribute differs from the one
in our cache.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 700ed1fc1075..4010c33151ad 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2390,11 +2390,12 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	nfs_fattr_map_and_free_names(server, &data->f_attr);
 
 	if (o_arg->open_flags & O_CREAT) {
-		update_changeattr(dir, &o_res->cinfo);
 		if (o_arg->open_flags & O_EXCL)
 			data->file_created = 1;
 		else if (o_res->cinfo.before != o_res->cinfo.after)
 			data->file_created = 1;
+		if (data->file_created || dir->i_version != o_res->cinfo.after)
+			update_changeattr(dir, &o_res->cinfo);
 	}
 	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
 		server->caps &= ~NFS_CAP_POSIX_LOCK;

From c733c49c32624f927f443be6dbabb387006bbe42 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 11 Jan 2017 12:32:26 -0500
Subject: [PATCH 197/258] NFSv4: Don't apply change_info4 twice on rename
 within a directory

If a file is renamed, but stays in the same directory, we will still receive
2 change_info4 structures describing the change to that directory, but we
only want to apply it once.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4010c33151ad..1e797bf74aaf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4159,8 +4159,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 	if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN)
 		return 0;
 
-	update_changeattr(old_dir, &res->old_cinfo);
-	update_changeattr(new_dir, &res->new_cinfo);
+	if (task->tk_status == 0) {
+		update_changeattr(old_dir, &res->old_cinfo);
+		if (new_dir != old_dir)
+			update_changeattr(new_dir, &res->new_cinfo);
+	}
 	return 1;
 }
 

From c40d52fe1c2ba25dcb8cd207c8d26ef5f57f0476 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 11 Jan 2017 12:36:11 -0500
Subject: [PATCH 198/258] NFSv4: Don't call update_changeattr() unless the
 unlink is successful

If the unlink wasn't successful, then the directory has presumably not
changed.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1e797bf74aaf..6a35204affa4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4125,7 +4125,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	if (nfs4_async_handle_error(task, res->server, NULL,
 				    &data->timeout) == -EAGAIN)
 		return 0;
-	update_changeattr(dir, &res->cinfo);
+	if (task->tk_status == 0)
+		update_changeattr(dir, &res->cinfo);
 	return 1;
 }
 

From d3129ef672cac81c4d0185336af377c8dc1091d3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 11 Jan 2017 22:07:28 -0500
Subject: [PATCH 199/258] NFSv4: update_changeattr should update the attribute
 timestamp

Otherwise, the attribute cache remains marked as being expired.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6a35204affa4..ecc151697fd4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1082,7 +1082,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
 	return nfs4_call_sync_sequence(clnt, server, msg, args, res);
 }
 
-static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
+		unsigned long timestamp)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
 
@@ -1098,6 +1099,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 				NFS_INO_INVALID_ACL;
 	}
 	dir->i_version = cinfo->after;
+	nfsi->read_cache_jiffies = timestamp;
 	nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	nfs_fscache_invalidate(dir);
 	spin_unlock(&dir->i_lock);
@@ -2395,7 +2397,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		else if (o_res->cinfo.before != o_res->cinfo.after)
 			data->file_created = 1;
 		if (data->file_created || dir->i_version != o_res->cinfo.after)
-			update_changeattr(dir, &o_res->cinfo);
+			update_changeattr(dir, &o_res->cinfo,
+					o_res->f_attr->time_start);
 	}
 	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
 		server->caps &= ~NFS_CAP_POSIX_LOCK;
@@ -4073,11 +4076,12 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name)
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	unsigned long timestamp = jiffies;
 	int status;
 
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
 	if (status == 0)
-		update_changeattr(dir, &res.cinfo);
+		update_changeattr(dir, &res.cinfo, timestamp);
 	return status;
 }
 
@@ -4126,7 +4130,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 				    &data->timeout) == -EAGAIN)
 		return 0;
 	if (task->tk_status == 0)
-		update_changeattr(dir, &res->cinfo);
+		update_changeattr(dir, &res->cinfo, res->dir_attr->time_start);
 	return 1;
 }
 
@@ -4161,9 +4165,9 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 		return 0;
 
 	if (task->tk_status == 0) {
-		update_changeattr(old_dir, &res->old_cinfo);
+		update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start);
 		if (new_dir != old_dir)
-			update_changeattr(new_dir, &res->new_cinfo);
+			update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start);
 	}
 	return 1;
 }
@@ -4201,7 +4205,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 
 	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
-		update_changeattr(dir, &res.cinfo);
+		update_changeattr(dir, &res.cinfo, res.fattr->time_start);
 		status = nfs_post_op_update_inode(inode, res.fattr);
 		if (!status)
 			nfs_setsecurity(inode, res.fattr, res.label);
@@ -4276,7 +4280,8 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
 	int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
 				    &data->arg.seq_args, &data->res.seq_res, 1);
 	if (status == 0) {
-		update_changeattr(dir, &data->res.dir_cinfo);
+		update_changeattr(dir, &data->res.dir_cinfo,
+				data->res.fattr->time_start);
 		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
 	}
 	return status;

From dcd208697707b12adeaa45643bab239c5e90ef9b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 11 Jan 2017 20:34:50 -0500
Subject: [PATCH 200/258] nfsd: fix supported attributes for acl & labels

Oops--in 916d2d844afd I moved some constants into an array for
convenience, but here I'm accidentally writing to that array.

The effect is that if you ever encounter a filesystem lacking support
for ACLs or security labels, then all queries of supported attributes
will report that attribute as unsupported from then on.

Fixes: 916d2d844afd "nfsd: clean up supported attribute handling"
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 7ecf16be4a44..8fae53ce21d1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2440,7 +2440,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 	p++;                /* to be backfilled later */
 
 	if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
-		u32 *supp = nfsd_suppattrs[minorversion];
+		u32 supp[3];
+
+		memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
 
 		if (!IS_POSIXACL(dentry->d_inode))
 			supp[0] &= ~FATTR4_WORD0_ACL;

From 78794d1890708cf94e3961261e52dcec2cc34722 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 9 Jan 2017 17:15:18 -0500
Subject: [PATCH 201/258] svcrpc: don't leak contexts on PROC_DESTROY

Context expiry times are in units of seconds since boot, not unix time.

The use of get_seconds() here therefore sets the expiry time decades in
the future.  This prevents timely freeing of contexts destroyed by
client RPC_GSS_PROC_DESTROY requests.  We'd still free them eventually
(when the module is unloaded or the container shut down), but a lot of
contexts could pile up before then.

Cc: stable@vger.kernel.org
Fixes: c5b29f885afe "sunrpc: use seconds since boot in expiry cache"
Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 886e9d381771..153082598522 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	case RPC_GSS_PROC_DESTROY:
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
-		rsci->h.expiry_time = get_seconds();
+		rsci->h.expiry_time = seconds_since_boot();
 		set_bit(CACHE_NEGATIVE, &rsci->h.flags);
 		if (resv->iov_len + 4 > PAGE_SIZE)
 			goto drop;

From 546125d1614264d26080817d0c8cddb9b25081fa Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 5 Jan 2017 16:34:51 -0500
Subject: [PATCH 202/258] sunrpc: don't call sleeping functions from the
 notifier block callbacks

The inet6addr_chain is an atomic notifier chain, so we can't call
anything that might sleep (like lock_sock)... instead of closing the
socket from svc_age_temp_xprts_now (which is called by the notifier
function), just have the rpc service threads do it instead.

Cc: stable@vger.kernel.org
Fixes: c3d4879e01be "sunrpc: Add a function to close..."
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/svc_xprt.c           | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index e5d193440374..7440290f64ac 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -66,6 +66,7 @@ struct svc_xprt {
 #define XPT_LISTENER	10		/* listening endpoint */
 #define XPT_CACHE_AUTH	11		/* cache auth info */
 #define XPT_LOCAL	12		/* connection from loopback interface */
+#define XPT_KILL_TEMP   13		/* call xpo_kill_temp_xprt before closing */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 3bc1d61694cb..9c9db55a0c1e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
 
 	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 		dprintk("svc_recv: found XPT_CLOSE\n");
+		if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
+			xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
 		svc_delete_xprt(xprt);
 		/* Leave XPT_BUSY set on the dead xprt: */
 		goto out;
@@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
 		le = to_be_closed.next;
 		list_del_init(le);
 		xprt = list_entry(le, struct svc_xprt, xpt_list);
-		dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
-		xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
-		svc_close_xprt(xprt);
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		set_bit(XPT_KILL_TEMP, &xprt->xpt_flags);
+		dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n",
+				xprt);
+		svc_xprt_enqueue(xprt);
 	}
 }
 EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now);

From 05a974efa4bdf6e2a150e3f27dc6fcf0a9ad5655 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Thu, 15 Dec 2016 18:40:14 +0100
Subject: [PATCH 203/258] ieee802154: atusb: do not use the stack for buffers
 to make them DMA able

From 4.9 we should really avoid using the stack here as this will not be DMA
able on various platforms. This changes the buffers already being present in
time of 4.9 being released. This should go into stable as well.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/atusb.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 1253f864737a..fa3e8c34b26c 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -117,13 +117,26 @@ static int atusb_read_reg(struct atusb *atusb, uint8_t reg)
 {
 	struct usb_device *usb_dev = atusb->usb_dev;
 	int ret;
+	uint8_t *buffer;
 	uint8_t value;
 
+	buffer = kmalloc(1, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
 	dev_dbg(&usb_dev->dev, "atusb: reg = 0x%x\n", reg);
 	ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
 				ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
-				0, reg, &value, 1, 1000);
-	return ret >= 0 ? value : ret;
+				0, reg, buffer, 1, 1000);
+
+	if (ret >= 0) {
+		value = buffer[0];
+		kfree(buffer);
+		return value;
+	} else {
+		kfree(buffer);
+		return ret;
+	}
 }
 
 static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask,
@@ -608,9 +621,13 @@ static const struct ieee802154_ops atusb_ops = {
 static int atusb_get_and_show_revision(struct atusb *atusb)
 {
 	struct usb_device *usb_dev = atusb->usb_dev;
-	unsigned char buffer[3];
+	unsigned char *buffer;
 	int ret;
 
+	buffer = kmalloc(3, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
 	/* Get a couple of the ATMega Firmware values */
 	ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
 				ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0,
@@ -631,15 +648,20 @@ static int atusb_get_and_show_revision(struct atusb *atusb)
 		dev_info(&usb_dev->dev, "Please update to version 0.2 or newer");
 	}
 
+	kfree(buffer);
 	return ret;
 }
 
 static int atusb_get_and_show_build(struct atusb *atusb)
 {
 	struct usb_device *usb_dev = atusb->usb_dev;
-	char build[ATUSB_BUILD_SIZE + 1];
+	char *build;
 	int ret;
 
+	build = kmalloc(ATUSB_BUILD_SIZE + 1, GFP_KERNEL);
+	if (!build)
+		return -ENOMEM;
+
 	ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
 				ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0,
 				build, ATUSB_BUILD_SIZE, 1000);
@@ -648,6 +670,7 @@ static int atusb_get_and_show_build(struct atusb *atusb)
 		dev_info(&usb_dev->dev, "Firmware: build %s\n", build);
 	}
 
+	kfree(build);
 	return ret;
 }
 

From 2fd2b550a5ed13b1d6640ff77630fc369636a544 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Thu, 15 Dec 2016 18:40:15 +0100
Subject: [PATCH 204/258] ieee802154: atusb: make sure we set a randaom
 extended address if fetching fails

In the unlikely case were the firmware is new enough but the actual USB command
still fails make sure we set a random address and return.

Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/atusb.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index fa3e8c34b26c..67790f88908d 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -737,8 +737,11 @@ static int atusb_set_extended_addr(struct atusb *atusb)
 	ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
 				ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0,
 				buffer, IEEE802154_EXTENDED_ADDR_LEN, 1000);
-	if (ret < 0)
-		dev_err(&usb_dev->dev, "failed to fetch extended address\n");
+	if (ret < 0) {
+		dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n");
+		ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
+		return ret;
+	}
 
 	memcpy(&extended_addr, buffer, IEEE802154_EXTENDED_ADDR_LEN);
 	/* Check if read address is not empty and the unicast bit is set correctly */

From 5eb35a6ccea61648a55713c076ab65423eea1ac0 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Thu, 15 Dec 2016 18:40:16 +0100
Subject: [PATCH 205/258] ieee802154: atusb: do not use the stack for address
 fetching to make it DMA able

From 4.9 we should really avoid using the stack here as this will not be DMA
able on various platforms. This changes a buffer that was introduced in the
4.10 merge window.

Fixes: 6cc33eba232c ("ieee802154: atusb: try to read permanent extended
address from device")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/atusb.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 67790f88908d..63cb67917a07 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -721,7 +721,7 @@ fail:
 static int atusb_set_extended_addr(struct atusb *atusb)
 {
 	struct usb_device *usb_dev = atusb->usb_dev;
-	unsigned char buffer[IEEE802154_EXTENDED_ADDR_LEN];
+	unsigned char *buffer;
 	__le64 extended_addr;
 	u64 addr;
 	int ret;
@@ -733,6 +733,10 @@ static int atusb_set_extended_addr(struct atusb *atusb)
 		return 0;
 	}
 
+	buffer = kmalloc(IEEE802154_EXTENDED_ADDR_LEN, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
 	/* Firmware is new enough so we fetch the address from EEPROM */
 	ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
 				ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0,
@@ -740,6 +744,7 @@ static int atusb_set_extended_addr(struct atusb *atusb)
 	if (ret < 0) {
 		dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n");
 		ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
+		kfree(buffer);
 		return ret;
 	}
 
@@ -755,6 +760,7 @@ static int atusb_set_extended_addr(struct atusb *atusb)
 			&addr);
 	}
 
+	kfree(buffer);
 	return ret;
 }
 

From f301606934b240fb54d8edf3618a0483e36046fc Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 18 Dec 2016 15:25:33 -0800
Subject: [PATCH 206/258] at86rf230: Allow slow GPIO pins for "rstn"

Driver code never touches "rstn" signal in atomic context, so there's
no need to implicitly put such restriction on it by using gpio_set_value
to manipulate it. Replace gpio_set_value to gpio_set_value_cansleep to
fix that.

As a an example of where such restriction might be inconvenient,
consider a hardware design where "rstn" is connected to a pin of I2C/SPI
GPIO expander chip.

Cc: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 46d53a6c8cf8..76ba7ecfe142 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1715,9 +1715,9 @@ static int at86rf230_probe(struct spi_device *spi)
 	/* Reset */
 	if (gpio_is_valid(rstn)) {
 		udelay(1);
-		gpio_set_value(rstn, 0);
+		gpio_set_value_cansleep(rstn, 0);
 		udelay(1);
-		gpio_set_value(rstn, 1);
+		gpio_set_value_cansleep(rstn, 1);
 		usleep_range(120, 240);
 	}
 

From 8e38b7d4d71479b23b77f01cf0e5071610b8f357 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Mon, 2 Jan 2017 16:58:13 +0100
Subject: [PATCH 207/258] ieee802154: atusb: fix driver to work with older
 firmware versions

After the addition of the frame_retries callback we could run into cases where
a ATUSB device with an older firmware version would now longer be able to bring
the interface up.

We keep this functionality disabled now if the minimum firmware version for this
feature is not available.

Fixes: 5d82288b93db3bc ("ieee802154: atusb: implement .set_frame_retries
ops callback")
Reported-by: Alexander Aring <aar@pengutronix.de>
Acked-by: Alexander Aring <aar@pengutronix.de>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/atusb.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 63cb67917a07..ef688518ad77 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -562,13 +562,6 @@ static int
 atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries)
 {
 	struct atusb *atusb = hw->priv;
-	struct device *dev = &atusb->usb_dev->dev;
-
-	if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 3) {
-		dev_info(dev, "Automatic frame retransmission is only available from "
-			"firmware version 0.3. Please update if you want this feature.");
-		return -EINVAL;
-	}
 
 	return atusb_write_subreg(atusb, SR_MAX_FRAME_RETRIES, retries);
 }
@@ -802,8 +795,7 @@ static int atusb_probe(struct usb_interface *interface,
 
 	hw->parent = &usb_dev->dev;
 	hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT |
-		    IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS |
-		    IEEE802154_HW_FRAME_RETRIES;
+		    IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS;
 
 	hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
 			 WPAN_PHY_FLAG_CCA_MODE;
@@ -832,6 +824,9 @@ static int atusb_probe(struct usb_interface *interface,
 	atusb_get_and_show_build(atusb);
 	atusb_set_extended_addr(atusb);
 
+	if (atusb->fw_ver_maj >= 0 && atusb->fw_ver_min >= 3)
+		hw->flags |= IEEE802154_HW_FRAME_RETRIES;
+
 	ret = atusb_get_and_clear_error(atusb);
 	if (ret) {
 		dev_err(&atusb->usb_dev->dev,

From ce1ca7d2d140a1f4aaffd297ac487f246963dd2f Mon Sep 17 00:00:00 2001
From: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Date: Mon, 9 Jan 2017 16:00:44 +0530
Subject: [PATCH 208/258] svcrdma: avoid duplicate dma unmapping during error
 recovery

In rdma_read_chunk_frmr() when ib_post_send() fails, the error code path
invokes ib_dma_unmap_sg() to unmap the sg list. It then invokes
svc_rdma_put_frmr() which in turn tries to unmap the same sg list through
ib_dma_unmap_sg() again. This second unmap is invalid and could lead to
problems when the iova being unmapped is subsequently reused. Remove
the call to unmap in rdma_read_chunk_frmr() and let svc_rdma_put_frmr()
handle it.

Fixes: 412a15c0fe53 ("svcrdma: Port to new memory registration API")
Cc: stable@vger.kernel.org
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 57d35fbb1c28..172b537f8cfc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -347,8 +347,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	atomic_inc(&rdma_stat_read);
 	return ret;
  err:
-	ib_dma_unmap_sg(xprt->sc_cm_id->device,
-			frmr->sg, frmr->sg_nents, frmr->direction);
 	svc_rdma_put_context(ctxt, 0);
 	svc_rdma_put_frmr(xprt, frmr);
 	return ret;

From 7aa4865506a26c607e00bd9794a85785b55ebca7 Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
Date: Thu, 12 Jan 2017 07:28:06 -0800
Subject: [PATCH 209/258] net: thunderx: acpi: fix LMAC initialization

While probing BGX we requesting appropriate QLM for it's configuration
and get LMAC count by that request. Then, while reading configured
MAC values from SSDT table we need to save them in proper mapping:
  BGX[i]->lmac[j].mac = <MAC value>
to later provide for initialization stuff. In order to fill
such mapping properly we need to add lmac index to be used while
acpi initialization since at this moment bgx->lmac_count already contains
actual value.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 9211c750e064..2f85b64f01fa 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -47,8 +47,9 @@ struct lmac {
 struct bgx {
 	u8			bgx_id;
 	struct	lmac		lmac[MAX_LMAC_PER_BGX];
-	int			lmac_count;
+	u8			lmac_count;
 	u8			max_lmac;
+	u8                      acpi_lmac_idx;
 	void __iomem		*reg_base;
 	struct pci_dev		*pdev;
 	bool                    is_dlm;
@@ -1143,13 +1144,13 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle,
 	if (acpi_bus_get_device(handle, &adev))
 		goto out;
 
-	acpi_get_mac_address(dev, adev, bgx->lmac[bgx->lmac_count].mac);
+	acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac);
 
-	SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, dev);
+	SET_NETDEV_DEV(&bgx->lmac[bgx->acpi_lmac_idx].netdev, dev);
 
-	bgx->lmac[bgx->lmac_count].lmacid = bgx->lmac_count;
+	bgx->lmac[bgx->acpi_lmac_idx].lmacid = bgx->acpi_lmac_idx;
+	bgx->acpi_lmac_idx++; /* move to next LMAC */
 out:
-	bgx->lmac_count++;
 	return AE_OK;
 }
 

From d1b333d12cde9cabe898160b6be9769d3382d81c Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 12 Jan 2017 16:52:03 +0800
Subject: [PATCH 210/258] vfio iommu type1: fix the testing of capability for
 remote task

Before the mdev enhancement type1 iommu used capable() to test the
capability of current task; in the course of mdev development a
new requirement, testing for another task other than current, was
raised.  ns_capable() was used for this purpose, however it still
tests current, the only difference is, in a specified namespace.

Fix it by using has_capability() instead, which tests the cap for
specified task in init_user_ns, the same namespace as capable().

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Jike Song <jike.song@intel.com>
Reviewed-by: James Morris <james.l.morris@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9266271a787a..77373e51b283 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -495,8 +495,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 				  unsigned long *pfn_base, bool do_accounting)
 {
 	unsigned long limit;
-	bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns,
-				   CAP_IPC_LOCK);
+	bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK);
 	struct mm_struct *mm;
 	int ret;
 	bool rsvd;

From 3139dc8ded6f27552a248d23fe9f086e3027fa12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 11 Jan 2017 15:39:31 +0100
Subject: [PATCH 211/258] dmaengine: rcar-dmac: unmap slave resource when
 channel is freed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The slave mapping should be removed together with other channel
resources when the channel is freed. If it's not unmapped it will hang
around forever after the channel is freed.

Fixes: 9f878603dbdb7db3 ("dmaengine: rcar-dmac: add iommu support for slave transfers")
Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sh/rcar-dmac.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 2e441d0ccd79..4c357d475465 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -986,6 +986,7 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
 {
 	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
 	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct rcar_dmac_chan_map *map = &rchan->map;
 	struct rcar_dmac_desc_page *page, *_page;
 	struct rcar_dmac_desc *desc;
 	LIST_HEAD(list);
@@ -1019,6 +1020,13 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
 		free_page((unsigned long)page);
 	}
 
+	/* Remove slave mapping if present. */
+	if (map->slave.xfer_size) {
+		dma_unmap_resource(chan->device->dev, map->addr,
+				   map->slave.xfer_size, map->dir, 0);
+		map->slave.xfer_size = 0;
+	}
+
 	pm_runtime_put(chan->device->dev);
 }
 

From 43071d8fb3b7f589d72663c496a6880fb097533c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 13 Jan 2017 11:28:25 +0100
Subject: [PATCH 212/258] mac80211: initialize SMPS field in HT capabilities

ibss and mesh modes copy the ht capabilites from the band without
overriding the SMPS state. Unfortunately the default value 0 for the
SMPS field means static SMPS instead of disabled.

This results in HT ibss and mesh setups using only single-stream rates,
even though SMPS is not supposed to be active.

Initialize SMPS to disabled for all bands on ieee80211_hw_register to
ensure that the value is sane where it is not overriden with the real
SMPS state.

Reported-by: Elektra Wagenrad <onelektra@gmx.net>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
[move VHT TODO comment to a better place]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1822c77f2b1c..56fb47953b72 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -913,12 +913,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
 		supp_vht = supp_vht || sband->vht_cap.vht_supported;
 
-		if (sband->ht_cap.ht_supported)
-			local->rx_chains =
-				max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
-				    local->rx_chains);
+		if (!sband->ht_cap.ht_supported)
+			continue;
 
 		/* TODO: consider VHT for RX chains, hopefully it's the same */
+		local->rx_chains =
+			max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
+			    local->rx_chains);
+
+		/* no need to mask, SM_PS_DISABLED has all bits set */
+		sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+			             IEEE80211_HT_CAP_SM_PS_SHIFT;
 	}
 
 	/* if low-level driver supports AP, we also support VLAN */

From dbef53621116474bb883f76f0ba6b7640bc42332 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Fri, 13 Jan 2017 13:32:51 +0100
Subject: [PATCH 213/258] mac80211: prevent skb/txq mismatch

Station structure is considered as not uploaded
(to driver) until drv_sta_state() finishes. This
call is however done after the structure is
attached to mac80211 internal lists and hashes.
This means mac80211 can lookup (and use) station
structure before it is uploaded to a driver.

If this happens (structure exists, but
sta->uploaded is false) fast_tx path can still be
taken. Deep in the fastpath call the sta->uploaded
is checked against to derive "pubsta" argument for
ieee80211_get_txq(). If sta->uploaded is false
(and sta is actually non-NULL) ieee80211_get_txq()
effectively downgraded to vif->txq.

At first glance this may look innocent but coerces
mac80211 into a state that is almost guaranteed
(codel may drop offending skb) to crash because a
station-oriented skb gets queued up on
vif-oriented txq. The ieee80211_tx_dequeue() ends
up looking at info->control.flags and tries to use
txq->sta which in the fail case is NULL.

It's probably pointless to pretend one can
downgrade skb from sta-txq to vif-txq.

Since downgrading unicast traffic to vif->txq must
not be done there's no txq to put a frame on if
sta->uploaded is false. Therefore the code is made
to fall back to regular tx() op path if the
described condition is hit.

Only drivers using wake_tx_queue were affected.

Example crash dump before fix:

 Unable to handle kernel paging request at virtual address ffffe26c
 PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211]
 [<bf4252a4>] (ieee80211_tx_dequeue [mac80211]) from
 [<bf4b1388>] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core])
 [<bf4b1388>] (ath10k_mac_tx_push_txq [ath10k_core]) from
 [<bf4bdfbc>] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core])
 [<bf4bdfbc>] (ath10k_htt_txrx_compl_task [ath10k_core])
 [<bf51c5a4>] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci])
 [<bf51c5a4>] (ath10k_pci_napi_poll [ath10k_pci]) from
 [<c0572e90>] (net_rx_action+0xac/0x160)

Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0d8b716e509e..797e847cbc49 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1243,7 +1243,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 					  struct ieee80211_vif *vif,
-					  struct ieee80211_sta *pubsta,
+					  struct sta_info *sta,
 					  struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -1257,10 +1257,13 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 	if (!ieee80211_is_data(hdr->frame_control))
 		return NULL;
 
-	if (pubsta) {
+	if (sta) {
 		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
 
-		txq = pubsta->txq[tid];
+		if (!sta->uploaded)
+			return NULL;
+
+		txq = sta->sta.txq[tid];
 	} else if (vif) {
 		txq = vif->txq;
 	}
@@ -1503,23 +1506,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
 	struct fq *fq = &local->fq;
 	struct ieee80211_vif *vif;
 	struct txq_info *txqi;
-	struct ieee80211_sta *pubsta;
 
 	if (!local->ops->wake_tx_queue ||
 	    sdata->vif.type == NL80211_IFTYPE_MONITOR)
 		return false;
 
-	if (sta && sta->uploaded)
-		pubsta = &sta->sta;
-	else
-		pubsta = NULL;
-
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
 				     struct ieee80211_sub_if_data, u.ap);
 
 	vif = &sdata->vif;
-	txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+	txqi = ieee80211_get_txq(local, vif, sta, skb);
 
 	if (!txqi)
 		return false;

From 94a6fa899d2cb5ee76933406df32996576a562e4 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 12 Jan 2017 08:24:16 -0700
Subject: [PATCH 214/258] vfio/type1: Remove pid_namespace.h include

Using has_capability() rather than ns_capable(), we're no longer using
this header.

Cc: Jike Song <jike.song@intel.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 77373e51b283..b3cc33fa6d26 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,7 +36,6 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
-#include <linux/pid_namespace.h>
 #include <linux/mdev.h>
 #include <linux/notifier.h>
 

From 148d3d021cf9724fcf189ce4e525a094bbf5ce89 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 12 Jan 2017 12:09:09 -0800
Subject: [PATCH 215/258] net: systemport: Decouple flow control from
 __bcm_sysport_tx_reclaim

The __bcm_sysport_tx_reclaim() function is used to reclaim transmit
resources in different places within the driver. Most of them should
not affect the state of the transit flow control.

Introduce bcm_sysport_tx_clean() which cleans the ring, but does not
re-enable flow control towards the networking stack, and make
bcm_sysport_tx_reclaim() do the actual transmit queue flow control.

Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 25 ++++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 7e8cf213fd81..744ed6ddaf37 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -710,11 +710,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 	unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct bcm_sysport_cb *cb;
-	struct netdev_queue *txq;
 	u32 hw_ind;
 
-	txq = netdev_get_tx_queue(ndev, ring->index);
-
 	/* Compute how many descriptors have been processed since last call */
 	hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index));
 	c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK;
@@ -745,9 +742,6 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 
 	ring->c_index = c_index;
 
-	if (netif_tx_queue_stopped(txq) && pkts_compl)
-		netif_tx_wake_queue(txq);
-
 	netif_dbg(priv, tx_done, ndev,
 		  "ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n",
 		  ring->index, ring->c_index, pkts_compl, bytes_compl);
@@ -759,16 +753,33 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 					   struct bcm_sysport_tx_ring *ring)
 {
+	struct netdev_queue *txq;
 	unsigned int released;
 	unsigned long flags;
 
+	txq = netdev_get_tx_queue(priv->netdev, ring->index);
+
 	spin_lock_irqsave(&ring->lock, flags);
 	released = __bcm_sysport_tx_reclaim(priv, ring);
+	if (released)
+		netif_tx_wake_queue(txq);
+
 	spin_unlock_irqrestore(&ring->lock, flags);
 
 	return released;
 }
 
+/* Locked version of the per-ring TX reclaim, but does not wake the queue */
+static void bcm_sysport_tx_clean(struct bcm_sysport_priv *priv,
+				 struct bcm_sysport_tx_ring *ring)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	__bcm_sysport_tx_reclaim(priv, ring);
+	spin_unlock_irqrestore(&ring->lock, flags);
+}
+
 static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_tx_ring *ring =
@@ -1252,7 +1263,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
 	napi_disable(&ring->napi);
 	netif_napi_del(&ring->napi);
 
-	bcm_sysport_tx_reclaim(priv, ring);
+	bcm_sysport_tx_clean(priv, ring);
 
 	kfree(ring->cbs);
 	ring->cbs = NULL;

From fa79581ea66ca43d56ef065346ac5be767fcb418 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Thu, 12 Jan 2017 21:30:01 +0100
Subject: [PATCH 216/258] ipv6: sr: fix several BUGs when preemption is enabled

When CONFIG_PREEMPT=y, CONFIG_IPV6=m and CONFIG_SEG6_HMAC=y,
seg6_hmac_init() is called during the initialization of the ipv6 module.
This causes a subsequent call to smp_processor_id() with preemption
enabled, resulting in the following trace.

[   20.451460] BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1
[   20.452556] caller is debug_smp_processor_id+0x17/0x19
[   20.453304] CPU: 0 PID: 1 Comm: systemd Not tainted 4.9.0-rc5-00973-g46738b1 #1
[   20.454406]  ffffc9000062fc18 ffffffff813607b2 0000000000000000 ffffffff81a7f782
[   20.455528]  ffffc9000062fc48 ffffffff813778dc 0000000000000000 00000000001dcf98
[   20.456539]  ffffffffa003bd08 ffffffff81af93e0 ffffc9000062fc58 ffffffff81377905
[   20.456539] Call Trace:
[   20.456539]  [<ffffffff813607b2>] dump_stack+0x63/0x7f
[   20.456539]  [<ffffffff813778dc>] check_preemption_disabled+0xd1/0xe3
[   20.456539]  [<ffffffff81377905>] debug_smp_processor_id+0x17/0x19
[   20.460260]  [<ffffffffa0061f3b>] seg6_hmac_init+0xfa/0x192 [ipv6]
[   20.460260]  [<ffffffffa0061ccc>] seg6_init+0x39/0x6f [ipv6]
[   20.460260]  [<ffffffffa006121a>] inet6_init+0x21a/0x321 [ipv6]
[   20.460260]  [<ffffffffa0061000>] ? 0xffffffffa0061000
[   20.460260]  [<ffffffff81000457>] do_one_initcall+0x8b/0x115
[   20.460260]  [<ffffffff811328a3>] do_init_module+0x53/0x1c4
[   20.460260]  [<ffffffff8110650a>] load_module+0x1153/0x14ec
[   20.460260]  [<ffffffff81106a7b>] SYSC_finit_module+0x8c/0xb9
[   20.460260]  [<ffffffff81106a7b>] ? SYSC_finit_module+0x8c/0xb9
[   20.460260]  [<ffffffff81106abc>] SyS_finit_module+0x9/0xb
[   20.460260]  [<ffffffff810014d1>] do_syscall_64+0x62/0x75
[   20.460260]  [<ffffffff816834f0>] entry_SYSCALL64_slow_path+0x25/0x25

Moreover, dst_cache_* functions also call smp_processor_id(), generating
a similar trace.

This patch uses raw_cpu_ptr() in seg6_hmac_init() rather than this_cpu_ptr()
and disable preemption when using dst_cache_* functions.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_hmac.c     | 2 +-
 net/ipv6/seg6_iptunnel.c | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index ef1c8a46e7ac..03a064803626 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -400,7 +400,7 @@ static int seg6_hmac_init_algo(void)
 			*p_tfm = tfm;
 		}
 
-		p_tfm = this_cpu_ptr(algo->tfms);
+		p_tfm = raw_cpu_ptr(algo->tfms);
 		tfm = *p_tfm;
 
 		shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index bbfca22c34ae..1d60cb132835 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -265,7 +265,9 @@ int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 
 #ifdef CONFIG_DST_CACHE
+	preempt_disable();
 	dst = dst_cache_get(&slwt->cache);
+	preempt_enable();
 #endif
 
 	if (unlikely(!dst)) {
@@ -286,7 +288,9 @@ int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		}
 
 #ifdef CONFIG_DST_CACHE
+		preempt_disable();
 		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+		preempt_enable();
 #endif
 	}
 

From 003c941057eaa868ca6fedd29a274c863167230d Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Thu, 12 Jan 2017 14:24:58 -0800
Subject: [PATCH 217/258] tcp: fix tcp_fastopen unaligned access complaints on
 sparc

Fix up a data alignment issue on sparc by swapping the order
of the cookie byte array field with the length field in
struct tcp_fastopen_cookie, and making it a proper union
to clean up the typecasting.

This addresses log complaints like these:
    log_unaligned: 113 callbacks suppressed
    Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360
    Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360
    Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360
    Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360
    Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h     | 7 ++++++-
 net/ipv4/tcp_fastopen.c | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fc5848dad7a4..c93f4b3a59cb 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 
 /* TCP Fast Open Cookie as stored in memory */
 struct tcp_fastopen_cookie {
+	union {
+		u8	val[TCP_FASTOPEN_COOKIE_MAX];
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr addr;
+#endif
+	};
 	s8	len;
-	u8	val[TCP_FASTOPEN_COOKIE_MAX];
 	bool	exp;	/* In RFC6994 experimental option format */
 };
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4e777a3243f9..f51919535ca7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 		struct tcp_fastopen_cookie tmp;
 
 		if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
-			struct in6_addr *buf = (struct in6_addr *) tmp.val;
+			struct in6_addr *buf = &tmp.addr;
 			int i;
 
 			for (i = 0; i < 4; i++)

From c6180a6237174f481dc856ed6e890d8196b6f0fb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 13 Jan 2017 13:31:32 -0500
Subject: [PATCH 218/258] NFSv4: Fix client recovery when server reboots
 multiple times

If the server reboots multiple times, the client should rely on the
server to tell it that it cannot reclaim state as per section 9.6.3.4
in RFC7530 and section 8.4.2.1 in RFC5661.
Currently, the client is being to conservative, and is assuming that
if the server reboots while state recovery is in progress, then it must
ignore state that was not recovered before the reboot.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4state.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1d152f4470cd..90e6193ce6be 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1729,7 +1729,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 			break;
 		case -NFS4ERR_STALE_CLIENTID:
 			set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-			nfs4_state_clear_reclaim_reboot(clp);
 			nfs4_state_start_reclaim_reboot(clp);
 			break;
 		case -NFS4ERR_EXPIRED:

From 2e3258ecfaebace1ceffaa14e0ea94775d54f46f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:10 +0100
Subject: [PATCH 219/258] block: add blk_rq_payload_bytes

Add a helper to calculate the actual data transfer size for special
payload requests.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ff3d774f2751..1ca8e8fd1078 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1000,6 +1000,19 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
+/*
+ * Some commands like WRITE SAME have a payload or data transfer size which
+ * is different from the size of the request.  Any driver that supports such
+ * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
+ * calculate the data transfer size.
+ */
+static inline unsigned int blk_rq_payload_bytes(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return rq->special_vec.bv_len;
+	return blk_rq_bytes(rq);
+}
+
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 						     int op)
 {

From fd102b125e174edbea34e6e7a2d371bc7901c53d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:11 +0100
Subject: [PATCH 220/258] scsi: use blk_rq_payload_bytes

Without that we'll pass a wrong payload size in cmd->sdb, which
can lead to hangs with drivers that need the total transfer size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Chris Valean <v-chvale@microsoft.com>
Reported-by: Dexuan Cui <decui@microsoft.com>
Fixes: f9d03f96 ("block: improve handling of the magic discard payload")
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c35b6de4ca64..ad4ff8fcd4dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1018,7 +1018,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
 	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
-	sdb->length = blk_rq_bytes(req);
+	sdb->length = blk_rq_payload_bytes(req);
 	return BLKPREP_OK;
 }
 

From b131c61d62266eb21b0f125f63f3d07e5670d726 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:12 +0100
Subject: [PATCH 221/258] nvme: use blk_rq_payload_bytes

The new blk_rq_payload_bytes generalizes the payload length hacks
that nvme_map_len did before.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c   |  5 ++---
 drivers/nvme/host/nvme.h |  8 --------
 drivers/nvme/host/pci.c  | 19 ++++++++-----------
 drivers/nvme/host/rdma.c | 13 +++++--------
 4 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index aa0bc60810a7..fcc9dcfdf675 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1654,13 +1654,12 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
 		struct nvme_fc_fcp_op *op)
 {
 	struct nvmefc_fcp_req *freq = &op->fcp_req;
-	u32 map_len = nvme_map_len(rq);
 	enum dma_data_direction dir;
 	int ret;
 
 	freq->sg_cnt = 0;
 
-	if (!map_len)
+	if (!blk_rq_payload_bytes(rq))
 		return 0;
 
 	freq->sg_table.sgl = freq->first_sgl;
@@ -1854,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret)
 		return ret;
 
-	data_len = nvme_map_len(rq);
+	data_len = blk_rq_payload_bytes(rq);
 	if (data_len)
 		io_dir = ((rq_data_dir(rq) == WRITE) ?
 					NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6377e14586dc..aead6d08ed2c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -225,14 +225,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-static inline unsigned nvme_map_len(struct request *rq)
-{
-	if (req_op(rq) == REQ_OP_DISCARD)
-		return sizeof(struct nvme_dsm_range);
-	else
-		return blk_rq_bytes(rq);
-}
-
 static inline void nvme_cleanup_cmd(struct request *req)
 {
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 19beeb7b2ac2..3faefabf339c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -306,11 +306,11 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, unsigned size,
-		struct nvme_dev *dev)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
+	unsigned int size = blk_rq_payload_bytes(rq);
 
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -420,12 +420,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
-		int total_len)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct dma_pool *pool;
-	int length = total_len;
+	int length = blk_rq_payload_bytes(req);
 	struct scatterlist *sg = iod->sg;
 	int dma_len = sg_dma_len(sg);
 	u64 dma_addr = sg_dma_address(sg);
@@ -501,7 +500,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
 }
 
 static int nvme_map_data(struct nvme_dev *dev, struct request *req,
-		unsigned size, struct nvme_command *cmnd)
+		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
@@ -519,7 +518,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 				DMA_ATTR_NO_WARN))
 		goto out;
 
-	if (!nvme_setup_prps(dev, req, size))
+	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
 	ret = BLK_MQ_RQ_QUEUE_ERROR;
@@ -580,7 +579,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	unsigned map_len;
 	int ret = BLK_MQ_RQ_QUEUE_OK;
 
 	/*
@@ -600,13 +598,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		return ret;
 
-	map_len = nvme_map_len(req);
-	ret = nvme_init_iod(req, map_len, dev);
+	ret = nvme_init_iod(req, dev);
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		goto out_free_cmd;
 
 	if (blk_rq_nr_phys_segments(req))
-		ret = nvme_map_data(dev, req, map_len, &cmnd);
+		ret = nvme_map_data(dev, req, &cmnd);
 
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		goto out_cleanup_iod;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 34e564857716..557f29b1f1bb 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -981,8 +981,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
 }
 
 static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
-		struct request *rq, unsigned int map_len,
-		struct nvme_command *c)
+		struct request *rq, struct nvme_command *c)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_device *dev = queue->device;
@@ -1014,9 +1013,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	}
 
 	if (count == 1) {
-		if (rq_data_dir(rq) == WRITE &&
-		    map_len <= nvme_rdma_inline_data_size(queue) &&
-		    nvme_rdma_queue_idx(queue))
+		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+		    blk_rq_payload_bytes(rq) <=
+				nvme_rdma_inline_data_size(queue))
 			return nvme_rdma_map_sg_inline(queue, req, c);
 
 		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -1444,7 +1443,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	unsigned int map_len;
 	int ret;
 
 	WARN_ON_ONCE(rq->tag < 0);
@@ -1462,8 +1460,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
-	map_len = nvme_map_len(rq);
-	ret = nvme_rdma_map_data(queue, rq, map_len, c);
+	ret = nvme_rdma_map_data(queue, rq, c);
 	if (ret < 0) {
 		dev_err(queue->ctrl->ctrl.device,
 			     "Failed to map data (%d)\n", ret);

From f80de881d8df967488b7343381619efa15019493 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:13 +0100
Subject: [PATCH 222/258] sd: remove __data_len hack for WRITE SAME

Now that we have the blk_rq_payload_bytes helper available to determine
the actual I/O size we don't need to mess around with __data_len for
WRITE SAME.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/sd.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b1933041da39..1fbb1ecf49f2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -836,7 +836,6 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	struct bio *bio = rq->bio;
 	sector_t sector = blk_rq_pos(rq);
 	unsigned int nr_sectors = blk_rq_sectors(rq);
-	unsigned int nr_bytes = blk_rq_bytes(rq);
 	int ret;
 
 	if (sdkp->device->no_write_same)
@@ -869,21 +868,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 
 	cmd->transfersize = sdp->sector_size;
 	cmd->allowed = SD_MAX_RETRIES;
-
-	/*
-	 * For WRITE_SAME the data transferred in the DATA IN buffer is
-	 * different from the amount of data actually written to the target.
-	 *
-	 * We set up __data_len to the amount of data transferred from the
-	 * DATA IN buffer so that blk_rq_map_sg set up the proper S/G list
-	 * to transfer a single sector of data first, but then reset it to
-	 * the amount of data to be written right after so that the I/O path
-	 * knows how much to actually write.
-	 */
-	rq->__data_len = sdp->sector_size;
-	ret = scsi_init_io(cmd);
-	rq->__data_len = nr_bytes;
-	return ret;
+	return scsi_init_io(cmd);
 }
 
 static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd)

From bef13315e990fd3d3fb4c39013aefd53f06c3657 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 15:18:16 -0700
Subject: [PATCH 223/258] block: don't try to discard from
 __blkdev_issue_zeroout

Discard can return -EIO asynchronously if the alignment for the request
isn't suitable for the driver, which makes a proper fallback to other
methods in __blkdev_issue_zeroout impossible.  Thus only issue a sync
discard from blkdev_issue_zeroout an don't try discard at all from
__blkdev_issue_zeroout as a non-invasive workaround.

One more reason why abusing discard for zeroing must die..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Eryu Guan <eguan@redhat.com>
Fixes: e73c23ff ("block: add async variant of blkdev_issue_zeroout")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-lib.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index ed89c8f4b2a0..f8c82a9b4012 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -301,13 +301,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
-	if (discard) {
-		ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
-				BLKDEV_DISCARD_ZERO, biop);
-		if (ret == 0 || (ret && ret != -EOPNOTSUPP))
-			goto out;
-	}
-
 	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
 			biop);
 	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
@@ -370,6 +363,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	struct bio *bio = NULL;
 	struct blk_plug plug;
 
+	if (discard) {
+		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+				BLKDEV_DISCARD_ZERO))
+			return 0;
+	}
+
 	blk_start_plug(&plug);
 	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
 			&bio, discard);

From 695085b4bc7603551db0b3da897b8bf9893ca218 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 13 Jan 2017 01:11:18 -0500
Subject: [PATCH 224/258] x86/tsc: Add the Intel Denverton Processor to
 native_calibrate_tsc()

The Intel Denverton microserver uses a 25 MHz TSC crystal,
so we can derive its exact [*] TSC frequency
using CPUID and some arithmetic, eg.:

  TSC: 1800 MHz (25000000 Hz * 216 / 3 / 1000000)

[*] 'exact' is only as good as the crystal, which should be +/- 20ppm

Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/306899f94804aece6d8fa8b4223ede3b48dbb59c.1484287748.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index be3a49ee0356..e41af597aed8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void)
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
 		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
 		case INTEL_FAM6_ATOM_GOLDMONT:

From 453828625731d0ba7218242ef6ec88f59408f368 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 12 Jan 2017 16:53:11 +0100
Subject: [PATCH 225/258] x86/mpx: Use compatible types in comparison to fix
 sparse error

info->si_addr is of type void __user *, so it should be compared against
something from the same address space.

This fixes the following sparse error:

  arch/x86/mm/mpx.c:296:27: error: incompatible types in comparison expression (different address spaces)

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mpx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 324e5713d386..af59f808742f 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 	 * We were not able to extract an address from the instruction,
 	 * probably because there was something invalid in it.
 	 */
-	if (info->si_addr == (void *)-1) {
+	if (info->si_addr == (void __user *)-1) {
 		err = -EINVAL;
 		goto err_out;
 	}

From 63cae12bce9861cec309798d34701cf3da20bc71 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 9 Dec 2016 14:59:00 +0100
Subject: [PATCH 226/258] perf/core: Fix sys_perf_event_open() vs. hotplug

There is problem with installing an event in a task that is 'stuck' on
an offline CPU.

Blocked tasks are not dis-assosciated from offlined CPUs, after all, a
blocked task doesn't run and doesn't require a CPU etc.. Only on
wakeup do we ammend the situation and place the task on a available
CPU.

If we hit such a task with perf_install_in_context() we'll loop until
either that task wakes up or the CPU comes back online, if the task
waking depends on the event being installed, we're stuck.

While looking into this issue, I also spotted another problem, if we
hit a task with perf_install_in_context() that is in the middle of
being migrated, that is we observe the old CPU before sending the IPI,
but run the IPI (on the old CPU) while the task is already running on
the new CPU, things also go sideways.

Rework things to rely on task_curr() -- outside of rq->lock -- which
is rather tricky. Imagine the following scenario where we're trying to
install the first event into our task 't':

CPU0            CPU1            CPU2

                (current == t)

t->perf_event_ctxp[] = ctx;
smp_mb();
cpu = task_cpu(t);

                switch(t, n);
                                migrate(t, 2);
                                switch(p, t);

                                ctx = t->perf_event_ctxp[]; // must not be NULL

smp_function_call(cpu, ..);

                generic_exec_single()
                  func();
                    spin_lock(ctx->lock);
                    if (task_curr(t)) // false

                    add_event_to_ctx();
                    spin_unlock(ctx->lock);

                                perf_event_context_sched_in();
                                  spin_lock(ctx->lock);
                                  // sees event

So its CPU0's store of t->perf_event_ctxp[] that must not go 'missing'.
Because if CPU2's load of that variable were to observe NULL, it would
not try to schedule the ctx and we'd have a task running without its
counter, which would be 'bad'.

As long as we observe !NULL, we'll acquire ctx->lock. If we acquire it
first and not see the event yet, then CPU0 must observe task_curr()
and retry. If the install happens first, then we must see the event on
sched-in and all is well.

I think we can translate the first part (until the 'must not be NULL')
of the scenario to a litmus test like:

  C C-peterz

  {
  }

  P0(int *x, int *y)
  {
          int r1;

          WRITE_ONCE(*x, 1);
          smp_mb();
          r1 = READ_ONCE(*y);
  }

  P1(int *y, int *z)
  {
          WRITE_ONCE(*y, 1);
          smp_store_release(z, 1);
  }

  P2(int *x, int *z)
  {
          int r1;
          int r2;

          r1 = smp_load_acquire(z);
	  smp_mb();
          r2 = READ_ONCE(*x);
  }

  exists
  (0:r1=0 /\ 2:r1=1 /\ 2:r2=0)

Where:
  x is perf_event_ctxp[],
  y is our tasks's CPU, and
  z is our task being placed on the rq of CPU2.

The P0 smp_mb() is the one added by this patch, ordering the store to
perf_event_ctxp[] from find_get_context() and the load of task_cpu()
in task_function_call().

The smp_store_release/smp_load_acquire model the RCpc locking of the
rq->lock and the smp_mb() of P2 is the context switch switching from
whatever CPU2 was running to our task 't'.

This litmus test evaluates into:

  Test C-peterz Allowed
  States 7
  0:r1=0; 2:r1=0; 2:r2=0;
  0:r1=0; 2:r1=0; 2:r2=1;
  0:r1=0; 2:r1=1; 2:r2=1;
  0:r1=1; 2:r1=0; 2:r2=0;
  0:r1=1; 2:r1=0; 2:r2=1;
  0:r1=1; 2:r1=1; 2:r2=0;
  0:r1=1; 2:r1=1; 2:r2=1;
  No
  Witnesses
  Positive: 0 Negative: 7
  Condition exists (0:r1=0 /\ 2:r1=1 /\ 2:r2=0)
  Observation C-peterz Never 0 7
  Hash=e427f41d9146b2a5445101d3e2fcaa34

And the strong and weak model agree.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Will Deacon <will.deacon@arm.com>
Cc: jeremy.linton@arm.com
Link: http://lkml.kernel.org/r/20161209135900.GU3174@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 70 ++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 22 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509fab8c..72ce7d63e561 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2249,7 +2249,7 @@ static int  __perf_install_in_context(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	bool activate = true;
+	bool reprogram = true;
 	int ret = 0;
 
 	raw_spin_lock(&cpuctx->ctx.lock);
@@ -2257,27 +2257,26 @@ static int  __perf_install_in_context(void *info)
 		raw_spin_lock(&ctx->lock);
 		task_ctx = ctx;
 
-		/* If we're on the wrong CPU, try again */
-		if (task_cpu(ctx->task) != smp_processor_id()) {
+		reprogram = (ctx->task == current);
+
+		/*
+		 * If the task is running, it must be running on this CPU,
+		 * otherwise we cannot reprogram things.
+		 *
+		 * If its not running, we don't care, ctx->lock will
+		 * serialize against it becoming runnable.
+		 */
+		if (task_curr(ctx->task) && !reprogram) {
 			ret = -ESRCH;
 			goto unlock;
 		}
 
-		/*
-		 * If we're on the right CPU, see if the task we target is
-		 * current, if not we don't have to activate the ctx, a future
-		 * context switch will do that for us.
-		 */
-		if (ctx->task != current)
-			activate = false;
-		else
-			WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
-
+		WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
 	} else if (task_ctx) {
 		raw_spin_lock(&task_ctx->lock);
 	}
 
-	if (activate) {
+	if (reprogram) {
 		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 		add_event_to_ctx(event, ctx);
 		ctx_resched(cpuctx, task_ctx);
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
 	/*
 	 * Installing events is tricky because we cannot rely on ctx->is_active
 	 * to be set in case this is the nr_events 0 -> 1 transition.
+	 *
+	 * Instead we use task_curr(), which tells us if the task is running.
+	 * However, since we use task_curr() outside of rq::lock, we can race
+	 * against the actual state. This means the result can be wrong.
+	 *
+	 * If we get a false positive, we retry, this is harmless.
+	 *
+	 * If we get a false negative, things are complicated. If we are after
+	 * perf_event_context_sched_in() ctx::lock will serialize us, and the
+	 * value must be correct. If we're before, it doesn't matter since
+	 * perf_event_context_sched_in() will program the counter.
+	 *
+	 * However, this hinges on the remote context switch having observed
+	 * our task->perf_event_ctxp[] store, such that it will in fact take
+	 * ctx::lock in perf_event_context_sched_in().
+	 *
+	 * We do this by task_function_call(), if the IPI fails to hit the task
+	 * we know any future context switch of task must see the
+	 * perf_event_ctpx[] store.
 	 */
-again:
+
 	/*
-	 * Cannot use task_function_call() because we need to run on the task's
-	 * CPU regardless of whether its current or not.
+	 * This smp_mb() orders the task->perf_event_ctxp[] store with the
+	 * task_cpu() load, such that if the IPI then does not find the task
+	 * running, a future context switch of that task must observe the
+	 * store.
 	 */
-	if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+	smp_mb();
+again:
+	if (!task_function_call(task, __perf_install_in_context, event))
 		return;
 
 	raw_spin_lock_irq(&ctx->lock);
@@ -2348,12 +2370,16 @@ again:
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
-	raw_spin_unlock_irq(&ctx->lock);
 	/*
-	 * Since !ctx->is_active doesn't mean anything, we must IPI
-	 * unconditionally.
+	 * If the task is not running, ctx->lock will avoid it becoming so,
+	 * thus we can safely install the event.
 	 */
-	goto again;
+	if (task_curr(task)) {
+		raw_spin_unlock_irq(&ctx->lock);
+		goto again;
+	}
+	add_event_to_ctx(event, ctx);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 /*

From 321027c1fe77f892f4ea07846aeae08cefbbb290 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 11 Jan 2017 21:09:50 +0100
Subject: [PATCH 227/258] perf/core: Fix concurrent sys_perf_event_open() vs.
 'move_group' race

Di Shen reported a race between two concurrent sys_perf_event_open()
calls where both try and move the same pre-existing software group
into a hardware context.

The problem is exactly that described in commit:

  f63a8daa5812 ("perf: Fix event->ctx locking")

... where, while we wait for a ctx->mutex acquisition, the event->ctx
relation can have changed under us.

That very same commit failed to recognise sys_perf_event_context() as an
external access vector to the events and thereby didn't apply the
established locking rules correctly.

So while one sys_perf_event_open() call is stuck waiting on
mutex_lock_double(), the other (which owns said locks) moves the group
about. So by the time the former sys_perf_event_open() acquires the
locks, the context we've acquired is stale (and possibly dead).

Apply the established locking rules as per perf_event_ctx_lock_nested()
to the mutex_lock_double() for the 'move_group' case. This obviously means
we need to validate state after we acquire the locks.

Reported-by: Di Shen (Keen Lab)
Tested-by: John Dias <joaodias@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Min Chong <mchong@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: f63a8daa5812 ("perf: Fix event->ctx locking")
Link: http://lkml.kernel.org/r/20170106131444.GZ3174@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 58 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72ce7d63e561..cbc5937265da 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9529,6 +9529,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
 	return 0;
 }
 
+/*
+ * Variation on perf_event_ctx_lock_nested(), except we take two context
+ * mutexes.
+ */
+static struct perf_event_context *
+__perf_event_ctx_lock_double(struct perf_event *group_leader,
+			     struct perf_event_context *ctx)
+{
+	struct perf_event_context *gctx;
+
+again:
+	rcu_read_lock();
+	gctx = READ_ONCE(group_leader->ctx);
+	if (!atomic_inc_not_zero(&gctx->refcount)) {
+		rcu_read_unlock();
+		goto again;
+	}
+	rcu_read_unlock();
+
+	mutex_lock_double(&gctx->mutex, &ctx->mutex);
+
+	if (group_leader->ctx != gctx) {
+		mutex_unlock(&ctx->mutex);
+		mutex_unlock(&gctx->mutex);
+		put_ctx(gctx);
+		goto again;
+	}
+
+	return gctx;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -9772,12 +9803,31 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (move_group) {
-		gctx = group_leader->ctx;
-		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+		gctx = __perf_event_ctx_lock_double(group_leader, ctx);
+
 		if (gctx->task == TASK_TOMBSTONE) {
 			err = -ESRCH;
 			goto err_locked;
 		}
+
+		/*
+		 * Check if we raced against another sys_perf_event_open() call
+		 * moving the software group underneath us.
+		 */
+		if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
+			/*
+			 * If someone moved the group out from under us, check
+			 * if this new event wound up on the same ctx, if so
+			 * its the regular !move_group case, otherwise fail.
+			 */
+			if (gctx != ctx) {
+				err = -EINVAL;
+				goto err_locked;
+			} else {
+				perf_event_ctx_unlock(group_leader, gctx);
+				move_group = 0;
+			}
+		}
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
@@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_unpin_context(ctx);
 
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 
 	if (task) {
@@ -9905,7 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open,
 
 err_locked:
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 /* err_file: */
 	fput(event_file);

From 475113d937adfd150eb82b5e2c5507125a68e7af Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 28 Dec 2016 14:31:03 +0100
Subject: [PATCH 228/258] perf/x86/intel: Account interrupts for PEBS errors

It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.

We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c |  6 ++++-
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 47 +++++++++++++++++++++++++-------------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cbc5937265da..110b38a58493 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static int
+__perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
 			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
+	return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = __perf_event_account_interrupt(event, throttle);
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events

From 18e7a45af91acdde99d3aa1372cc40e1f8142f7b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 3 Jan 2017 15:24:54 +0100
Subject: [PATCH 229/258] perf/x86: Reject non sampling events with precise_ip

As Peter suggested [1] rejecting non sampling PEBS events,
because they dont make any sense and could cause bugs
in the NMI handler [2].

  [1] http://lkml.kernel.org/r/20170103094059.GC3093@worktop
  [2] http://lkml.kernel.org/r/1482931866-6018-3-git-send-email-jolsa@kernel.org

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20170103142454.GA26251@krava
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 019c5887b698..1635c0c8df23 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event)
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
+
+		/* There's no sense in having PEBS for non sampling events: */
+		if (!is_sampling_event(event))
+			return -EINVAL;
 	}
 	/*
 	 * check that PEBS LBR correction does not conflict with

From c7334ce814f7e5d8fc1f9b3126cda0640c2f81b3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 14 Jan 2017 14:09:03 +0100
Subject: [PATCH 230/258] Revert "driver core: Add deferred_probe attribute to
 devices in sysfs"

This reverts commit 6751667a29d6fd64afb9ce30567ad616b68ed789.

Rob Herring objected to it, and a replacement for it will be added using
debugfs in the future.

Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-deferred_probe        | 12 ------------
 drivers/base/base.h                                 |  2 --
 drivers/base/core.c                                 |  7 -------
 drivers/base/dd.c                                   | 13 -------------
 4 files changed, 34 deletions(-)
 delete mode 100644 Documentation/ABI/testing/sysfs-devices-deferred_probe

diff --git a/Documentation/ABI/testing/sysfs-devices-deferred_probe b/Documentation/ABI/testing/sysfs-devices-deferred_probe
deleted file mode 100644
index 58553d7a321f..000000000000
--- a/Documentation/ABI/testing/sysfs-devices-deferred_probe
+++ /dev/null
@@ -1,12 +0,0 @@
-What:		/sys/devices/.../deferred_probe
-Date:		August 2016
-Contact:	Ben Hutchings <ben.hutchings@codethink.co.uk>
-Description:
-		The /sys/devices/.../deferred_probe attribute is
-		present for all devices.  If a driver detects during
-		probing a device that a related device is not yet
-		ready, it may defer probing of the first device.  The
-		kernel will retry probing the first device after any
-		other device is successfully probed.  This attribute
-		reads as 1 if probing of this device is currently
-		deferred, or 0 otherwise.
diff --git a/drivers/base/base.h b/drivers/base/base.h
index ada9dce34e6d..e19b1008e5fb 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -141,8 +141,6 @@ extern void device_unblock_probing(void);
 extern struct kset *devices_kset;
 extern void devices_kset_move_last(struct device *dev);
 
-extern struct device_attribute dev_attr_deferred_probe;
-
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
 extern void module_add_driver(struct module *mod, struct device_driver *drv);
 extern void module_remove_driver(struct device_driver *drv);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 020ea7f05520..8c25e68e67d7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1060,14 +1060,8 @@ static int device_add_attrs(struct device *dev)
 			goto err_remove_dev_groups;
 	}
 
-	error = device_create_file(dev, &dev_attr_deferred_probe);
-	if (error)
-		goto err_remove_online;
-
 	return 0;
 
- err_remove_online:
-	device_remove_file(dev, &dev_attr_online);
  err_remove_dev_groups:
 	device_remove_groups(dev, dev->groups);
  err_remove_type_groups:
@@ -1085,7 +1079,6 @@ static void device_remove_attrs(struct device *dev)
 	struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 
-	device_remove_file(dev, &dev_attr_deferred_probe);
 	device_remove_file(dev, &dev_attr_online);
 	device_remove_groups(dev, dev->groups);
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a8b258e5407b..a1fbf55c4d3a 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -53,19 +53,6 @@ static LIST_HEAD(deferred_probe_pending_list);
 static LIST_HEAD(deferred_probe_active_list);
 static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
 
-static ssize_t deferred_probe_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	bool value;
-
-	mutex_lock(&deferred_probe_mutex);
-	value = !list_empty(&dev->p->deferred_probe);
-	mutex_unlock(&deferred_probe_mutex);
-
-	return sprintf(buf, "%d\n", value);
-}
-DEVICE_ATTR_RO(deferred_probe);
-
 /*
  * In some cases, like suspend to RAM or hibernation, It might be reasonable
  * to prohibit probing of devices as it could be unsafe.

From 0100a3e67a9cef64d72cd3a1da86f3ddbee50363 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 12 Dec 2016 18:42:28 -0500
Subject: [PATCH 231/258] efi/x86: Prune invalid memory map entries and fix
 boot regression

Some machines, such as the Lenovo ThinkPad W541 with firmware GNET80WW
(2.28), include memory map entries with phys_addr=0x0 and num_pages=0.

These machines fail to boot after the following commit,

  commit 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()")

Fix this by removing such bogus entries from the memory map.

Furthermore, currently the log output for this case (with efi=debug)
looks like:

 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |  |  |  |  |  ] range=[0x0000000000000000-0xffffffffffffffff] (0MB)

This is clearly wrong, and also not as informative as it could be.  This
patch changes it so that if we find obviously invalid memory map
entries, we print an error and skip those entries.  It also detects the
display of the address range calculation overflow, so the new output is:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[0x0000000000000000-0x0000000000000000] (invalid)

It also detects memory map sizes that would overflow the physical
address, for example phys_addr=0xfffffffffffff000 and
num_pages=0x0200000000000001, and prints:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[phys_addr=0xfffffffffffff000-0x20ffffffffffffffff] (invalid)

It then removes these entries from the memory map.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ardb: refactor for clarity with no functional changes, avoid PAGE_SHIFT]
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
[Matt: Include bugzilla info in commit log]
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191121
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi.c | 66 +++++++++++++++++++++++++++++++++++++
 include/linux/efi.h         |  1 +
 2 files changed, 67 insertions(+)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 936a488d6cf6..274dfc481849 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void)
 	return 0;
 }
 
+#define OVERFLOW_ADDR_SHIFT	(64 - EFI_PAGE_SHIFT)
+#define OVERFLOW_ADDR_MASK	(U64_MAX << OVERFLOW_ADDR_SHIFT)
+#define U64_HIGH_BIT		(~(U64_MAX >> 1))
+
+static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i)
+{
+	u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1;
+	u64 end_hi = 0;
+	char buf[64];
+
+	if (md->num_pages == 0) {
+		end = 0;
+	} else if (md->num_pages > EFI_PAGES_MAX ||
+		   EFI_PAGES_MAX - md->num_pages <
+		   (md->phys_addr >> EFI_PAGE_SHIFT)) {
+		end_hi = (md->num_pages & OVERFLOW_ADDR_MASK)
+			>> OVERFLOW_ADDR_SHIFT;
+
+		if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT))
+			end_hi += 1;
+	} else {
+		return true;
+	}
+
+	pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n");
+
+	if (end_hi) {
+		pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n",
+			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			md->phys_addr, end_hi, end);
+	} else {
+		pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n",
+			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			md->phys_addr, end);
+	}
+	return false;
+}
+
+static void __init efi_clean_memmap(void)
+{
+	efi_memory_desc_t *out = efi.memmap.map;
+	const efi_memory_desc_t *in = out;
+	const efi_memory_desc_t *end = efi.memmap.map_end;
+	int i, n_removal;
+
+	for (i = n_removal = 0; in < end; i++) {
+		if (efi_memmap_entry_valid(in, i)) {
+			if (out != in)
+				memcpy(out, in, efi.memmap.desc_size);
+			out = (void *)out + efi.memmap.desc_size;
+		} else {
+			n_removal++;
+		}
+		in = (void *)in + efi.memmap.desc_size;
+	}
+
+	if (n_removal > 0) {
+		u64 size = efi.memmap.nr_map - n_removal;
+
+		pr_warn("Removing %d invalid memory map entries.\n", n_removal);
+		efi_memmap_install(efi.memmap.phys_map, size);
+	}
+}
+
 void __init efi_print_memmap(void)
 {
 	efi_memory_desc_t *md;
@@ -472,6 +536,8 @@ void __init efi_init(void)
 		}
 	}
 
+	efi_clean_memmap();
+
 	if (efi_enabled(EFI_DBG))
 		efi_print_memmap();
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0c5420208c40..5b1af30ece55 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -103,6 +103,7 @@ typedef	struct {
 
 #define EFI_PAGE_SHIFT		12
 #define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
+#define EFI_PAGES_MAX		(U64_MAX >> EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;

From a12f1ae61c489076a9aeb90bddca7722bf330df3 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Tue, 13 Dec 2016 12:09:56 -0800
Subject: [PATCH 232/258] aio: fix lock dep warning

lockdep reports a warnning. file_start_write/file_end_write only
acquire/release the lock for regular files. So checking the files in aio
side too.

[  453.532141] ------------[ cut here ]------------
[  453.533011] WARNING: CPU: 1 PID: 1298 at ../kernel/locking/lockdep.c:3514 lock_release+0x434/0x670
[  453.533011] DEBUG_LOCKS_WARN_ON(depth <= 0)
[  453.533011] Modules linked in:
[  453.533011] CPU: 1 PID: 1298 Comm: fio Not tainted 4.9.0+ #964
[  453.533011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014
[  453.533011]  ffff8803a24b7a70 ffffffff8196cffb ffff8803a24b7ae8 0000000000000000
[  453.533011]  ffff8803a24b7ab8 ffffffff81091ee1 ffff8803a5dba700 00000dba00000008
[  453.533011]  ffffed0074496f59 ffff8803a5dbaf54 ffff8803ae0f8488 fffffffffffffdef
[  453.533011] Call Trace:
[  453.533011]  [<ffffffff8196cffb>] dump_stack+0x67/0x9c
[  453.533011]  [<ffffffff81091ee1>] __warn+0x111/0x130
[  453.533011]  [<ffffffff81091f97>] warn_slowpath_fmt+0x97/0xb0
[  453.533011]  [<ffffffff81091f00>] ? __warn+0x130/0x130
[  453.533011]  [<ffffffff8191b789>] ? blk_finish_plug+0x29/0x60
[  453.533011]  [<ffffffff811205d4>] lock_release+0x434/0x670
[  453.533011]  [<ffffffff8198af94>] ? import_single_range+0xd4/0x110
[  453.533011]  [<ffffffff81322195>] ? rw_verify_area+0x65/0x140
[  453.533011]  [<ffffffff813aa696>] ? aio_write+0x1f6/0x280
[  453.533011]  [<ffffffff813aa6c9>] aio_write+0x229/0x280
[  453.533011]  [<ffffffff813aa4a0>] ? aio_complete+0x640/0x640
[  453.533011]  [<ffffffff8111df20>] ? debug_check_no_locks_freed+0x1a0/0x1a0
[  453.533011]  [<ffffffff8114793a>] ? debug_lockdep_rcu_enabled.part.2+0x1a/0x30
[  453.533011]  [<ffffffff81147985>] ? debug_lockdep_rcu_enabled+0x35/0x40
[  453.533011]  [<ffffffff812a92be>] ? __might_fault+0x7e/0xf0
[  453.533011]  [<ffffffff813ac9bc>] do_io_submit+0x94c/0xb10
[  453.533011]  [<ffffffff813ac2ae>] ? do_io_submit+0x23e/0xb10
[  453.533011]  [<ffffffff813ac070>] ? SyS_io_destroy+0x270/0x270
[  453.533011]  [<ffffffff8111d7b3>] ? mark_held_locks+0x23/0xc0
[  453.533011]  [<ffffffff8100201a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
[  453.533011]  [<ffffffff813acb90>] SyS_io_submit+0x10/0x20
[  453.533011]  [<ffffffff824f96aa>] entry_SYSCALL_64_fastpath+0x18/0xad
[  453.533011]  [<ffffffff81119190>] ? trace_hardirqs_off_caller+0xc0/0x110
[  453.533011] ---[ end trace b2fbe664d1cc0082 ]---

Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 4ab67e8cb776..873b4ca82ccb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
 		 * Tell lockdep we inherited freeze protection from submission
 		 * thread.
 		 */
-		__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		if (S_ISREG(file_inode(file)->i_mode))
+			__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 		file_end_write(file);
 	}
 
@@ -1525,7 +1526,8 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 		 * by telling it the lock got released so that it doesn't
 		 * complain about held lock when we return to userspace.
 		 */
-		__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		if (S_ISREG(file_inode(file)->i_mode))
+			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 	}
 	kfree(iovec);
 	return ret;

From 4d22c75d4c7b5c5f4bd31054f09103ee490878fd Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Wed, 11 Jan 2017 13:25:00 -0600
Subject: [PATCH 233/258] coredump: Ensure proper size of sparse core files

If the last section of a core file ends with an unmapped or zero page,
the size of the file does not correspond with the last dump_skip() call.
gdb complains that the file is truncated and can be confusing to users.

After all of the vma sections are written, make sure that the file size
is no smaller than the current file position.

This problem can be demonstrated with gdb's bigcore testcase on the
sparc architecture.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          |  1 +
 fs/coredump.c            | 18 ++++++++++++++++++
 include/linux/coredump.h |  1 +
 3 files changed, 20 insertions(+)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 29a02daf08a9..422370293cfd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2298,6 +2298,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				goto end_coredump;
 		}
 	}
+	dump_truncate(cprm);
 
 	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
diff --git a/fs/coredump.c b/fs/coredump.c
index e525b6017cdf..ae6b05629ca1 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -833,3 +833,21 @@ int dump_align(struct coredump_params *cprm, int align)
 	return mod ? dump_skip(cprm, align - mod) : 1;
 }
 EXPORT_SYMBOL(dump_align);
+
+/*
+ * Ensures that file size is big enough to contain the current file
+ * postion. This prevents gdb from complaining about a truncated file
+ * if the last "write" to the file was dump_skip.
+ */
+void dump_truncate(struct coredump_params *cprm)
+{
+	struct file *file = cprm->file;
+	loff_t offset;
+
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		offset = file->f_op->llseek(file, 0, SEEK_CUR);
+		if (i_size_read(file->f_mapping->host) < offset)
+			do_truncate(file->f_path.dentry, offset, 0, file);
+	}
+}
+EXPORT_SYMBOL(dump_truncate);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d016a121a8c4..28ffa94aed6b 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,6 +14,7 @@ struct coredump_params;
 extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
+extern void dump_truncate(struct coredump_params *cprm);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(const siginfo_t *siginfo);
 #else

From b9dc6f65bc5e232d1c05fe34b5daadc7e8bbf1fb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 14 Jan 2017 19:33:08 -0500
Subject: [PATCH 234/258] fix a fencepost error in pipe_advance()

The logics in pipe_advance() used to release all buffers past the new
position failed in cases when the number of buffers to release was equal
to pipe->buffers.  If that happened, none of them had been released,
leaving pipe full.  Worse, it was trivial to trigger and we end up with
pipe full of uninitialized pages.  IOW, it's an infoleak.

Cc: stable@vger.kernel.org # v4.9
Reported-by: "Alan J. Wylie" <alan@wylie.me.uk>
Tested-by: "Alan J. Wylie" <alan@wylie.me.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 66 ++++++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 25f572303801..e68604ae3ced 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -730,43 +730,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-static void pipe_advance(struct iov_iter *i, size_t size)
+static inline void pipe_truncate(struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
-	struct pipe_buffer *buf;
-	int idx = i->idx;
-	size_t off = i->iov_offset, orig_sz;
-	
-	if (unlikely(i->count < size))
-		size = i->count;
-	orig_sz = size;
-
-	if (size) {
-		if (off) /* make it relative to the beginning of buffer */
-			size += off - pipe->bufs[idx].offset;
-		while (1) {
-			buf = &pipe->bufs[idx];
-			if (size <= buf->len)
-				break;
-			size -= buf->len;
-			idx = next_idx(idx, pipe);
-		}
-		buf->len = size;
-		i->idx = idx;
-		off = i->iov_offset = buf->offset + size;
-	}
-	if (off)
-		idx = next_idx(idx, pipe);
 	if (pipe->nrbufs) {
-		int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
-		/* [curbuf,unused) is in use.  Free [idx,unused) */
-		while (idx != unused) {
+		size_t off = i->iov_offset;
+		int idx = i->idx;
+		int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
+		if (off) {
+			pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
+			idx = next_idx(idx, pipe);
+			nrbufs++;
+		}
+		while (pipe->nrbufs > nrbufs) {
 			pipe_buf_release(pipe, &pipe->bufs[idx]);
 			idx = next_idx(idx, pipe);
 			pipe->nrbufs--;
 		}
 	}
-	i->count -= orig_sz;
+}
+
+static void pipe_advance(struct iov_iter *i, size_t size)
+{
+	struct pipe_inode_info *pipe = i->pipe;
+	if (unlikely(i->count < size))
+		size = i->count;
+	if (size) {
+		struct pipe_buffer *buf;
+		size_t off = i->iov_offset, left = size;
+		int idx = i->idx;
+		if (off) /* make it relative to the beginning of buffer */
+			left += off - pipe->bufs[idx].offset;
+		while (1) {
+			buf = &pipe->bufs[idx];
+			if (left <= buf->len)
+				break;
+			left -= buf->len;
+			idx = next_idx(idx, pipe);
+		}
+		i->idx = idx;
+		i->iov_offset = buf->offset + left;
+	}
+	i->count -= size;
+	/* ... and discard everything past that point */
+	pipe_truncate(i);
 }
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
@@ -826,6 +833,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
 			size_t count)
 {
 	BUG_ON(direction != ITER_PIPE);
+	WARN_ON(pipe->nrbufs == pipe->buffers);
 	i->type = direction;
 	i->pipe = pipe;
 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);

From 602d9858f07c72eab64f5f00e2fae55f9902cfbe Mon Sep 17 00:00:00 2001
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Date: Wed, 11 Jan 2017 21:56:31 +0300
Subject: [PATCH 235/258] swiotlb: ensure that page-sized mappings are
 page-aligned

Some drivers do depend on page mappings to be page aligned.

Swiotlb already enforces such alignment for mappings greater than page,
extend that to page-sized mappings as well.

Without this fix, nvme hits BUG() in nvme_setup_prps(), because that routine
assumes page-aligned mappings.

Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 lib/swiotlb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 975b8fc4f1e1..a8d74a733a38 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -483,11 +483,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
 
 	/*
-	 * For mappings greater than a page, we limit the stride (and
-	 * hence alignment) to a page size.
+	 * For mappings greater than or equal to a page, we limit the stride
+	 * (and hence alignment) to a page size.
 	 */
 	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size > PAGE_SIZE)
+	if (size >= PAGE_SIZE)
 		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
 	else
 		stride = 1;

From 49def1853334396f948dcb4cedb9347abb318df5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 15 Jan 2017 16:21:59 -0800
Subject: [PATCH 236/258] Linux 4.10-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5f1a84735ff6..96e2352d10a8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Roaring Lionus
 
 # *DOCUMENTATION*

From 75f01a4c9cc291ff5cb28ca1216adb163b7a20ee Mon Sep 17 00:00:00 2001
From: Lance Richardson <lrichard@redhat.com>
Date: Thu, 12 Jan 2017 19:33:18 -0500
Subject: [PATCH 237/258] openvswitch: maintain correct checksum state in
 conntrack actions

When executing conntrack actions on skbuffs with checksum mode
CHECKSUM_COMPLETE, the checksum must be updated to account for
header pushes and pulls. Otherwise we get "hw csum failure"
logs similar to this (ICMP packet received on geneve tunnel
via ixgbe NIC):

[  405.740065] genev_sys_6081: hw csum failure
[  405.740106] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G          I     4.10.0-rc3+ #1
[  405.740108] Call Trace:
[  405.740110]  <IRQ>
[  405.740113]  dump_stack+0x63/0x87
[  405.740116]  netdev_rx_csum_fault+0x3a/0x40
[  405.740118]  __skb_checksum_complete+0xcf/0xe0
[  405.740120]  nf_ip_checksum+0xc8/0xf0
[  405.740124]  icmp_error+0x1de/0x351 [nf_conntrack_ipv4]
[  405.740132]  nf_conntrack_in+0xe1/0x550 [nf_conntrack]
[  405.740137]  ? find_bucket.isra.2+0x62/0x70 [openvswitch]
[  405.740143]  __ovs_ct_lookup+0x95/0x980 [openvswitch]
[  405.740145]  ? netif_rx_internal+0x44/0x110
[  405.740149]  ovs_ct_execute+0x147/0x4b0 [openvswitch]
[  405.740153]  do_execute_actions+0x22e/0xa70 [openvswitch]
[  405.740157]  ovs_execute_actions+0x40/0x120 [openvswitch]
[  405.740161]  ovs_dp_process_packet+0x84/0x120 [openvswitch]
[  405.740166]  ovs_vport_receive+0x73/0xd0 [openvswitch]
[  405.740168]  ? udp_rcv+0x1a/0x20
[  405.740170]  ? ip_local_deliver_finish+0x93/0x1e0
[  405.740172]  ? ip_local_deliver+0x6f/0xe0
[  405.740174]  ? ip_rcv_finish+0x3a0/0x3a0
[  405.740176]  ? ip_rcv_finish+0xdb/0x3a0
[  405.740177]  ? ip_rcv+0x2a7/0x400
[  405.740180]  ? __netif_receive_skb_core+0x970/0xa00
[  405.740185]  netdev_frame_hook+0xd3/0x160 [openvswitch]
[  405.740187]  __netif_receive_skb_core+0x1dc/0xa00
[  405.740194]  ? ixgbe_clean_rx_irq+0x46d/0xa20 [ixgbe]
[  405.740197]  __netif_receive_skb+0x18/0x60
[  405.740199]  netif_receive_skb_internal+0x40/0xb0
[  405.740201]  napi_gro_receive+0xcd/0x120
[  405.740204]  gro_cell_poll+0x57/0x80 [geneve]
[  405.740206]  net_rx_action+0x260/0x3c0
[  405.740209]  __do_softirq+0xc9/0x28c
[  405.740211]  irq_exit+0xd9/0xf0
[  405.740213]  do_IRQ+0x51/0xd0
[  405.740215]  common_interrupt+0x93/0x93

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 6b78bab27755..54253ea5976e 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 	int hooknum, nh_off, err = NF_ACCEPT;
 
 	nh_off = skb_network_offset(skb);
-	skb_pull(skb, nh_off);
+	skb_pull_rcsum(skb, nh_off);
 
 	/* See HOOK2MANIP(). */
 	if (maniptype == NF_NAT_MANIP_SRC)
@@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 	err = nf_nat_packet(ct, ctinfo, hooknum, skb);
 push:
 	skb_push(skb, nh_off);
+	skb_postpush_rcsum(skb, skb->data, nh_off);
 
 	return err;
 }
@@ -886,7 +887,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 
 	/* The conntrack module expects to be working at L3. */
 	nh_ofs = skb_network_offset(skb);
-	skb_pull(skb, nh_ofs);
+	skb_pull_rcsum(skb, nh_ofs);
 
 	if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
 		err = handle_fragments(net, key, info->zone.id, skb);
@@ -900,6 +901,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		err = ovs_ct_lookup(net, key, info, skb);
 
 	skb_push(skb, nh_ofs);
+	skb_postpush_rcsum(skb, skb->data, nh_ofs);
 	if (err)
 		kfree_skb(skb);
 	return err;

From ee6ff743e3a4b697e8286054667d7e4e1b56510d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 12:05:03 +0100
Subject: [PATCH 238/258] mmc: core: Restore parts of the polling policy when
 switch to HS/HS DDR

Regressions for not being able to detect an eMMC HS DDR mode card has been
reported for the sdhci-esdhc-imx driver, but potentially other sdhci
variants may suffer from the similar problem.

The commit e173f8911f09 ("mmc: core: Update CMD13 polling policy when
switch to HS DDR mode"), is causing the problem. It seems that change moved
one step to far, regarding changing the host's timing before polling for a
busy card.

To fix this, let's move back to the behaviour when the host's timing is
updated after the polling, but before the switch status is fetched and
validated.

In cases when polling with CMD13, we keep validating the switch status at
each attempt. However, to align with the other card busy detections
mechanism, let's fetch and validate the switch status also after the host's
timing is updated.

Reported-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Reported-by: Gary Bisson <gary.bisson@boundarydevices.com>
Fixes: e173f8911f09 ("mmc: core: Update CMD13 polling policy when switch..")
Cc: Shawn Lin <shawn.lin@rock-chips.com>
Cc: Dong Aisheng <aisheng.dong@nxp.com>
Cc: Haibo Chen <haibo.chen@nxp.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Tested-by: Jagan Teki <jagan@amarulasolutions.com>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Haibo Chen <haibo.chen@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
---
 drivers/mmc/core/mmc_ops.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index b11c3455b040..e6ea8503f40c 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -506,9 +506,6 @@ static int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 		}
 	} while (busy);
 
-	if (host->ops->card_busy && send_status)
-		return mmc_switch_status(card);
-
 	return 0;
 }
 
@@ -577,24 +574,26 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 	if (!use_busy_signal)
 		goto out;
 
-	/* Switch to new timing before poll and check switch status. */
-	if (timing)
-		mmc_set_timing(host, timing);
-
 	/*If SPI or used HW busy detection above, then we don't need to poll. */
 	if (((host->caps & MMC_CAP_WAIT_WHILE_BUSY) && use_r1b_resp) ||
-		mmc_host_is_spi(host)) {
-		if (send_status)
-			err = mmc_switch_status(card);
+		mmc_host_is_spi(host))
 		goto out_tim;
-	}
 
 	/* Let's try to poll to find out when the command is completed. */
 	err = mmc_poll_for_busy(card, timeout_ms, send_status, retry_crc_err);
+	if (err)
+		goto out;
 
 out_tim:
-	if (err && timing)
-		mmc_set_timing(host, old_timing);
+	/* Switch to new timing before check switch status. */
+	if (timing)
+		mmc_set_timing(host, timing);
+
+	if (send_status) {
+		err = mmc_switch_status(card);
+		if (err && timing)
+			mmc_set_timing(host, old_timing);
+	}
 out:
 	mmc_retune_release(host);
 

From 8cf699ec849f4ca1413cea01289bd7d37dbcc626 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 13 Jan 2017 08:39:24 -0800
Subject: [PATCH 239/258] mlx4: do not call napi_schedule() without care

Disable BH around the call to napi_schedule() to avoid following warning

[   52.095499] NOHZ: local_softirq_pending 08
[   52.421291] NOHZ: local_softirq_pending 08
[   52.608313] NOHZ: local_softirq_pending 08

Fixes: 8d59de8f7bb3 ("net/mlx4_en: Process all completions in RX rings after port goes up")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Erez Shitrit <erezsh@mellanox.com>
Cc: Eugenia Emantayev <eugenia@mellanox.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4910d9af1933..761f8b12399c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1748,8 +1748,11 @@ int mlx4_en_start_port(struct net_device *dev)
 	/* Process all completions if exist to prevent
 	 * the queues freezing if they are full
 	 */
-	for (i = 0; i < priv->rx_ring_num; i++)
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		local_bh_disable();
 		napi_schedule(&priv->rx_cq[i]->napi);
+		local_bh_enable();
+	}
 
 	netif_tx_start_all_queues(dev);
 	netif_device_attach(dev);

From 8ec3e8a192ba6f13be4522ee81227c792c86fb1a Mon Sep 17 00:00:00 2001
From: Masaru Nagai <masaru.nagai.vx@renesas.com>
Date: Mon, 16 Jan 2017 11:45:21 +0100
Subject: [PATCH 240/258] ravb: do not use zero-length alignment DMA descriptor

Due to alignment requirements of the hardware transmissions are split into
two DMA descriptors, a small padding descriptor of 0 - 3 bytes in length
followed by a descriptor for rest of the packet.

In the case of IP packets the first descriptor will never be zero due to
the way that the stack aligns buffers for IP packets. However, for non-IP
packets it may be zero.

In that case it has been reported that timeouts occur, presumably because
transmission stops at the first zero-length DMA descriptor and thus the
packet is not transmitted. However, in my environment a BUG is triggered as
follows:

[   20.381417] ------------[ cut here ]------------
[   20.386054] kernel BUG at lib/swiotlb.c:495!
[   20.390324] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[   20.395805] Modules linked in:
[   20.398862] CPU: 0 PID: 2089 Comm: mz Not tainted 4.10.0-rc3-00001-gf13ad2db193f #162
[   20.406689] Hardware name: Renesas Salvator-X board based on r8a7796 (DT)
[   20.413474] task: ffff80063b1f1900 task.stack: ffff80063a71c000
[   20.419404] PC is at swiotlb_tbl_map_single+0x178/0x2ec
[   20.424625] LR is at map_single+0x4c/0x98
[   20.428629] pc : [<ffff00000839c4c0>] lr : [<ffff00000839c680>] pstate: 800001c5
[   20.436019] sp : ffff80063a71f9b0
[   20.439327] x29: ffff80063a71f9b0 x28: ffff80063a20d500
[   20.444636] x27: ffff000008ed5000 x26: 0000000000000000
[   20.449944] x25: 000000067abe2adc x24: 0000000000000000
[   20.455252] x23: 0000000000200000 x22: 0000000000000001
[   20.460559] x21: 0000000000175ffe x20: ffff80063b2a0010
[   20.465866] x19: 0000000000000000 x18: 0000ffffcae6fb20
[   20.471173] x17: 0000ffffa09ba018 x16: ffff0000087c8b70
[   20.476480] x15: 0000ffffa084f588 x14: 0000ffffa09cfa14
[   20.481787] x13: 0000ffffcae87ff0 x12: 000000000063abe2
[   20.487098] x11: ffff000008096360 x10: ffff80063abe2adc
[   20.492407] x9 : 0000000000000000 x8 : 0000000000000000
[   20.497718] x7 : 0000000000000000 x6 : ffff000008ed50d0
[   20.503028] x5 : 0000000000000000 x4 : 0000000000000001
[   20.508338] x3 : 0000000000000000 x2 : 000000067abe2adc
[   20.513648] x1 : 00000000bafff000 x0 : 0000000000000000
[   20.518958]
[   20.520446] Process mz (pid: 2089, stack limit = 0xffff80063a71c000)
[   20.526798] Stack: (0xffff80063a71f9b0 to 0xffff80063a720000)
[   20.532543] f9a0:                                   ffff80063a71fa30 ffff00000839c680
[   20.540374] f9c0: ffff80063b2a0010 ffff80063b2a0010 0000000000000001 0000000000000000
[   20.548204] f9e0: 000000000000006e ffff80063b23c000 ffff80063b23c000 0000000000000000
[   20.556034] fa00: ffff80063b23c000 ffff80063a20d500 000000013b1f1900 0000000000000000
[   20.563864] fa20: ffff80063ffd18e0 ffff80063b2a0010 ffff80063a71fa60 ffff00000839cd10
[   20.571694] fa40: ffff80063b2a0010 0000000000000000 ffff80063ffd18e0 000000067abe2adc
[   20.579524] fa60: ffff80063a71fa90 ffff000008096380 ffff80063b2a0010 0000000000000000
[   20.587353] fa80: 0000000000000000 0000000000000001 ffff80063a71fac0 ffff00000864f770
[   20.595184] faa0: ffff80063b23caf0 0000000000000000 0000000000000000 0000000000000140
[   20.603014] fac0: ffff80063a71fb60 ffff0000087e6498 ffff80063a20d500 ffff80063b23c000
[   20.610843] fae0: 0000000000000000 ffff000008daeaf0 0000000000000000 ffff000008daeb00
[   20.618673] fb00: ffff80063a71fc0c ffff000008da7000 ffff80063b23c090 ffff80063a44f000
[   20.626503] fb20: 0000000000000000 ffff000008daeb00 ffff80063a71fc0c ffff000008da7000
[   20.634333] fb40: ffff80063b23c090 0000000000000000 ffff800600000037 ffff0000087e63d8
[   20.642163] fb60: ffff80063a71fbc0 ffff000008807510 ffff80063a692400 ffff80063a20d500
[   20.649993] fb80: ffff80063a44f000 ffff80063b23c000 ffff80063a69249c 0000000000000000
[   20.657823] fba0: 0000000000000000 ffff80063a087800 ffff80063b23c000 ffff80063a20d500
[   20.665653] fbc0: ffff80063a71fc10 ffff0000087e67dc ffff80063a20d500 ffff80063a692400
[   20.673483] fbe0: ffff80063b23c000 0000000000000000 ffff80063a44f000 ffff80063a69249c
[   20.681312] fc00: ffff80063a5f1a10 000000103a087800 ffff80063a71fc70 ffff0000087e6b24
[   20.689142] fc20: ffff80063a5f1a80 ffff80063a71fde8 000000000000000f 00000000000005ea
[   20.696972] fc40: ffff80063a5f1a10 0000000000000000 000000000000000f ffff00000887fbd0
[   20.704802] fc60: fffffff43a5f1a80 0000000000000000 ffff80063a71fc80 ffff000008880240
[   20.712632] fc80: ffff80063a71fd90 ffff0000087c7a34 ffff80063afc7180 0000000000000000
[   20.720462] fca0: 0000ffffcae6fe18 0000000000000014 0000000060000000 0000000000000015
[   20.728292] fcc0: 0000000000000123 00000000000000ce ffff0000088d2000 ffff80063b1f1900
[   20.736122] fce0: 0000000000008933 ffff000008e7cb80 ffff80063a71fd80 ffff0000087c50a4
[   20.743951] fd00: 0000000000008933 ffff000008e7cb80 ffff000008e7cb80 000000100000000e
[   20.751781] fd20: ffff80063a71fe4c 0000ffff00000300 0000000000000123 0000000000000000
[   20.759611] fd40: 0000000000000000 ffff80063b1f0000 000000000000000e 0000000000000300
[   20.767441] fd60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[   20.775271] fd80: 0000000000000000 0000000000000000 ffff80063a71fda0 ffff0000087c8c20
[   20.783100] fda0: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000
[   20.790930] fdc0: ffffffffffffffff 0000ffffa0903078 0000000000000000 000000001ea87232
[   20.798760] fde0: 000000000000000f ffff80063a71fe40 ffff800600000014 ffff000000000001
[   20.806590] fe00: 0000000000000000 0000000000000000 ffff80063a71fde8 0000000000000000
[   20.814420] fe20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
[   20.822249] fe40: 0000000203000011 0000000000000000 0000000000000000 ffff80063a68aa00
[   20.830079] fe60: ffff80063a68aa00 0000000000000003 0000000000008933 ffff0000081f1b9c
[   20.837909] fe80: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000
[   20.845739] fea0: ffffffffffffffff 0000ffffa07ca81c 0000000060000000 0000000000000015
[   20.853569] fec0: 0000000000000003 000000001ea87232 000000000000000f 0000000000000000
[   20.861399] fee0: 0000ffffcae6fe18 0000000000000014 0000000000000300 0000000000000000
[   20.869228] ff00: 00000000000000ce 0000000000000000 00000000ffffffff 0000000000000000
[   20.877059] ff20: 0000000000000002 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588
[   20.884888] ff40: 0000000000000000 0000ffffa09ba018 0000ffffcae6fb20 000000001ea87010
[   20.892718] ff60: 0000ffffa09b9000 0000ffffcae6fe30 0000ffffcae6fe18 000000000000000f
[   20.900548] ff80: 0000000000000003 000000001ea87232 0000000000000000 0000000000000000
[   20.908378] ffa0: 0000000000000000 0000ffffcae6fdc0 0000ffffa09a7824 0000ffffcae6fdc0
[   20.916208] ffc0: 0000ffffa0903078 0000000060000000 0000000000000003 00000000000000ce
[   20.924038] ffe0: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff
[   20.931867] Call trace:
[   20.934312] Exception stack(0xffff80063a71f7e0 to 0xffff80063a71f910)
[   20.940750] f7e0: 0000000000000000 0001000000000000 ffff80063a71f9b0 ffff00000839c4c0
[   20.948580] f800: ffff80063a71f840 ffff00000888a6e4 ffff80063a24c418 ffff80063a24c448
[   20.956410] f820: 0000000000000000 ffff00000811cd54 ffff80063a71f860 ffff80063a24c458
[   20.964240] f840: ffff80063a71f870 ffff00000888b258 ffff80063a24c418 0000000000000001
[   20.972070] f860: ffff80063a71f910 ffff80063a7b7028 ffff80063a71f890 ffff0000088825e4
[   20.979899] f880: 0000000000000000 00000000bafff000 000000067abe2adc 0000000000000000
[   20.987729] f8a0: 0000000000000001 0000000000000000 ffff000008ed50d0 0000000000000000
[   20.995560] f8c0: 0000000000000000 0000000000000000 ffff80063abe2adc ffff000008096360
[   21.003390] f8e0: 000000000063abe2 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588
[   21.011219] f900: ffff0000087c8b70 0000ffffa09ba018
[   21.016097] [<ffff00000839c4c0>] swiotlb_tbl_map_single+0x178/0x2ec
[   21.022362] [<ffff00000839c680>] map_single+0x4c/0x98
[   21.027411] [<ffff00000839cd10>] swiotlb_map_page+0xa4/0x138
[   21.033072] [<ffff000008096380>] __swiotlb_map_page+0x20/0x7c
[   21.038821] [<ffff00000864f770>] ravb_start_xmit+0x174/0x668
[   21.044484] [<ffff0000087e6498>] dev_hard_start_xmit+0x8c/0x120
[   21.050407] [<ffff000008807510>] sch_direct_xmit+0x108/0x1a0
[   21.056064] [<ffff0000087e67dc>] __dev_queue_xmit+0x194/0x4cc
[   21.061807] [<ffff0000087e6b24>] dev_queue_xmit+0x10/0x18
[   21.067214] [<ffff000008880240>] packet_sendmsg+0xf40/0x1220
[   21.072873] [<ffff0000087c7a34>] sock_sendmsg+0x18/0x2c
[   21.078097] [<ffff0000087c8c20>] SyS_sendto+0xb0/0xf0
[   21.083150] [<ffff000008082f30>] el0_svc_naked+0x24/0x28
[   21.088462] Code: d34bfef7 2a1803f3 1a9f86d6 35fff878 (d4210000)
[   21.094611] ---[ end trace 5bc544ad491f3814 ]---
[   21.099234] Kernel panic - not syncing: Fatal exception in interrupt
[   21.105587] Kernel Offset: disabled
[   21.109073] Memory Limit: none
[   21.112126] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

Fixes: 2f45d1902acf ("ravb: minimize TX data copying")
Signed-off-by: Masaru Nagai <masaru.nagai.vx@renesas.com
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 5e5ad978eab9..89ac1e3f6175 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1504,6 +1504,19 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
 		 entry / NUM_TX_DESC * DPTR_ALIGN;
 	len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data;
+	/* Zero length DMA descriptors are problematic as they seem to
+	 * terminate DMA transfers. Avoid them by simply using a length of
+	 * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN.
+	 *
+	 * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of
+	 * data by the call to skb_put_padto() above this is safe with
+	 * respect to both the length of the first DMA descriptor (len)
+	 * overflowing the available data and the length of the second DMA
+	 * descriptor (skb->len - len) being negative.
+	 */
+	if (len == 0)
+		len = DPTR_ALIGN;
+
 	memcpy(buffer, skb->data, len);
 	dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE);
 	if (dma_mapping_error(ndev->dev.parent, dma_addr))

From d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Jan 2017 14:20:54 +0100
Subject: [PATCH 241/258] cpmac: remove hopeless #warning

The #warning was present 10 years ago when the driver first got merged.
As the platform is rather obsolete by now, it seems very unlikely that
the warning will cause anyone to fix the code properly.

kernelci.org reports the warning for every build in the meantime, so
I think it's better to just turn it into a code comment to reduce
noise.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 77c88fcf2b86..9b8a30bf939b 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1210,7 +1210,7 @@ int cpmac_init(void)
 		goto fail_alloc;
 	}
 
-#warning FIXME: unhardcode gpio&reset bits
+	/* FIXME: unhardcode gpio&reset bits */
 	ar7_gpio_disable(26);
 	ar7_gpio_disable(27);
 	ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);

From fe68d8bfe59c561664aa87d827aa4b320eb08895 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Fri, 13 Jan 2017 22:38:27 +0100
Subject: [PATCH 242/258] be2net: fix status check in be_cmd_pmac_add()

Return value from be_mcc_notify_wait() contains a base completion status
together with an additional status. The base_status() macro need to be
used to access base status.

Fixes: e3a7ae2 be2net: Changing MAC Address of a VF was broken
Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 0e74529a4209..30e855004c57 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1118,7 +1118,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
 err:
 	mutex_unlock(&adapter->mcc_lock);
 
-	 if (status == MCC_STATUS_UNAUTHORIZED_REQUEST)
+	 if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST)
 		status = -EPERM;
 
 	return status;

From 6d928ae590c8d58cfd5cca997d54394de139cbb7 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Fri, 13 Jan 2017 22:38:28 +0100
Subject: [PATCH 243/258] be2net: don't delete MAC on close on unprivileged BE3
 VFs

BE3 VFs without FILTMGMT privilege are not allowed to modify its MAC,
VLAN table and UC/MC lists. So don't try to delete MAC on such VFs.

Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index ec010ced6c99..d606e20fe69e 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3609,7 +3609,11 @@ static void be_rx_qs_destroy(struct be_adapter *adapter)
 
 static void be_disable_if_filters(struct be_adapter *adapter)
 {
-	be_dev_mac_del(adapter, adapter->pmac_id[0]);
+	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
+	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
+	    check_privilege(adapter, BE_PRIV_FILTMGMT))
+		be_dev_mac_del(adapter, adapter->pmac_id[0]);
+
 	be_clear_uc_list(adapter);
 	be_clear_mc_list(adapter);
 

From 34393529163af7163ef8459808e3cf2af7db7f16 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Fri, 13 Jan 2017 22:38:29 +0100
Subject: [PATCH 244/258] be2net: fix MAC addr setting on privileged BE3 VFs

During interface opening MAC address stored in netdev->dev_addr is
programmed in the HW with exception of BE3 VFs where the initial
MAC is programmed by parent PF. This is OK when MAC address is not
changed when an interfaces is down. In this case the requested MAC is
stored to netdev->dev_addr and later is stored into HW during opening.
But this is not done for all BE3 VFs so the NIC HW does not know
anything about this change and all traffic is filtered.

This is the case of bonding if fail_over_mac == 0 where the MACs of
the slaves are changed while they are down.

The be2net behavior is too restrictive because if a BE3 VF has
the FILTMGMT privilege then it is able to modify its MAC without
any restriction.

To solve the described problem the driver should take care about these
privileged BE3 VFs so the MAC is programmed during opening. And by
contrast unpriviled BE3 VFs should not be allowed to change its MAC
in any case.

Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index d606e20fe69e..1a7f8ad7b9c6 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -318,6 +318,13 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 		return 0;
 
+	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
+	 * address
+	 */
+	if (BEx_chip(adapter) && be_virtfn(adapter) &&
+	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
+		return -EPERM;
+
 	/* if device is not running, copy MAC to netdev->dev_addr */
 	if (!netif_running(netdev))
 		goto done;
@@ -3766,8 +3773,9 @@ static int be_enable_if_filters(struct be_adapter *adapter)
 	if (status)
 		return status;
 
-	/* For BE3 VFs, the PF programs the initial MAC address */
-	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
+	/* Don't add MAC on BE3 VFs without FILTMGMT privilege */
+	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
+	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
 		if (status)
 			return status;

From 1666d49e1d416fcc2cce708242a52fe3317ea8ba Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 12 Jan 2017 21:19:37 +0800
Subject: [PATCH 245/258] mld: do not remove mld souce list info when set link
 down

This is an IPv6 version of commit 24803f38a5c0 ("igmp: do not remove igmp
souce list..."). In mld_del_delrec(), we will restore back all source filter
info instead of flush them.

Move mld_clear_delrec() from ipv6_mc_down() to ipv6_mc_destroy_dev() since
we should not remove source list info when set link down. Remove
igmp6_group_dropped() in ipv6_mc_destroy_dev() since we have called it in
ipv6_mc_down().

Also clear all source info after igmp6_group_dropped() instead of in it
because ipv6_mc_down() will call igmp6_group_dropped().

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 51 ++++++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 14a3903f1c82..7139fffd61b6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -81,7 +81,7 @@ static void mld_gq_timer_expire(unsigned long data);
 static void mld_ifc_timer_expire(unsigned long data);
 static void mld_ifc_event(struct inet6_dev *idev);
 static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
+static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
 static void mld_clear_delrec(struct inet6_dev *idev);
 static bool mld_in_v1_mode(const struct inet6_dev *idev);
 static int sf_setstate(struct ifmcaddr6 *pmc);
@@ -692,9 +692,9 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 			dev_mc_del(dev, buf);
 	}
 
-	if (mc->mca_flags & MAF_NOREPORT)
-		goto done;
 	spin_unlock_bh(&mc->mca_lock);
+	if (mc->mca_flags & MAF_NOREPORT)
+		return;
 
 	if (!mc->idev->dead)
 		igmp6_leave_group(mc);
@@ -702,8 +702,6 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	spin_lock_bh(&mc->mca_lock);
 	if (del_timer(&mc->mca_timer))
 		atomic_dec(&mc->mca_refcnt);
-done:
-	ip6_mc_clear_src(mc);
 	spin_unlock_bh(&mc->mca_lock);
 }
 
@@ -748,10 +746,11 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	spin_unlock_bh(&idev->mc_lock);
 }
 
-static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
+static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 {
 	struct ifmcaddr6 *pmc, *pmc_prev;
-	struct ip6_sf_list *psf, *psf_next;
+	struct ip6_sf_list *psf;
+	struct in6_addr *pmca = &im->mca_addr;
 
 	spin_lock_bh(&idev->mc_lock);
 	pmc_prev = NULL;
@@ -768,14 +767,20 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
 	}
 	spin_unlock_bh(&idev->mc_lock);
 
+	spin_lock_bh(&im->mca_lock);
 	if (pmc) {
-		for (psf = pmc->mca_tomb; psf; psf = psf_next) {
-			psf_next = psf->sf_next;
-			kfree(psf);
+		im->idev = pmc->idev;
+		im->mca_crcount = idev->mc_qrv;
+		im->mca_sfmode = pmc->mca_sfmode;
+		if (pmc->mca_sfmode == MCAST_INCLUDE) {
+			im->mca_tomb = pmc->mca_tomb;
+			im->mca_sources = pmc->mca_sources;
+			for (psf = im->mca_sources; psf; psf = psf->sf_next)
+				psf->sf_crcount = im->mca_crcount;
 		}
 		in6_dev_put(pmc->idev);
-		kfree(pmc);
 	}
+	spin_unlock_bh(&im->mca_lock);
 }
 
 static void mld_clear_delrec(struct inet6_dev *idev)
@@ -904,7 +909,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	mca_get(mc);
 	write_unlock_bh(&idev->lock);
 
-	mld_del_delrec(idev, &mc->mca_addr);
+	mld_del_delrec(idev, mc);
 	igmp6_group_added(mc);
 	ma_put(mc);
 	return 0;
@@ -927,6 +932,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 				write_unlock_bh(&idev->lock);
 
 				igmp6_group_dropped(ma);
+				ip6_mc_clear_src(ma);
 
 				ma_put(ma);
 				return 0;
@@ -2501,15 +2507,17 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	/* Withdraw multicast list */
 
 	read_lock_bh(&idev->lock);
-	mld_ifc_stop_timer(idev);
-	mld_gq_stop_timer(idev);
-	mld_dad_stop_timer(idev);
 
 	for (i = idev->mc_list; i; i = i->next)
 		igmp6_group_dropped(i);
-	read_unlock_bh(&idev->lock);
 
-	mld_clear_delrec(idev);
+	/* Should stop timer after group drop. or we will
+	 * start timer again in mld_ifc_event()
+	 */
+	mld_ifc_stop_timer(idev);
+	mld_gq_stop_timer(idev);
+	mld_dad_stop_timer(idev);
+	read_unlock_bh(&idev->lock);
 }
 
 static void ipv6_mc_reset(struct inet6_dev *idev)
@@ -2531,8 +2539,10 @@ void ipv6_mc_up(struct inet6_dev *idev)
 
 	read_lock_bh(&idev->lock);
 	ipv6_mc_reset(idev);
-	for (i = idev->mc_list; i; i = i->next)
+	for (i = idev->mc_list; i; i = i->next) {
+		mld_del_delrec(idev, i);
 		igmp6_group_added(i);
+	}
 	read_unlock_bh(&idev->lock);
 }
 
@@ -2565,6 +2575,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 
 	/* Deactivate timers */
 	ipv6_mc_down(idev);
+	mld_clear_delrec(idev);
 
 	/* Delete all-nodes address. */
 	/* We cannot call ipv6_dev_mc_dec() directly, our caller in
@@ -2579,11 +2590,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((i = idev->mc_list) != NULL) {
 		idev->mc_list = i->next;
+
 		write_unlock_bh(&idev->lock);
-
-		igmp6_group_dropped(i);
 		ma_put(i);
-
 		write_lock_bh(&idev->lock);
 	}
 	write_unlock_bh(&idev->lock);

From 02ca0423fd65a0a9c4d70da0dbb8f4b8503f08c7 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Fri, 13 Jan 2017 10:12:20 +0100
Subject: [PATCH 246/258] ip6_tunnel: Account for tunnel header in tunnel MTU

With ip6gre we have a tunnel header which also makes the tunnel MTU
smaller. We need to reserve room for it. Previously we were using up
space reserved for the Tunnel Encapsulation Limit option
header (RFC 2473).

Also, after commit b05229f44228 ("gre6: Cleanup GREv6 transmit path,
call common GRE functions") our contract with the caller has
changed. Now we check if the packet length exceeds the tunnel MTU after
the tunnel header has been pushed, unlike before.

This is reflected in the check where we look at the packet length minus
the size of the tunnel header, which is already accounted for in tunnel
MTU.

Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions")
Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 36d292180942..753d6d0860fb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1108,7 +1108,7 @@ route_lookup:
 				     t->parms.name);
 		goto tx_err_dst_release;
 	}
-	mtu = dst_mtu(dst) - psh_hlen;
+	mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
 	if (encap_limit >= 0) {
 		max_headroom += 8;
 		mtu -= 8;
@@ -1117,7 +1117,7 @@ route_lookup:
 		mtu = IPV6_MIN_MTU;
 	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-	if (skb->len > mtu && !skb_is_gso(skb)) {
+	if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
 		goto tx_err_dst_release;

From 34c55cf2fc75f8bf6ba87df321038c064cf2d426 Mon Sep 17 00:00:00 2001
From: "Karicheri, Muralidharan" <m-karicheri2@ti.com>
Date: Fri, 13 Jan 2017 09:32:34 -0500
Subject: [PATCH 247/258] net: phy: dp83867: allow RGMII_TXID/RGMII_RXID
 interface types

Currently dp83867 driver returns error if phy interface type
PHY_INTERFACE_MODE_RGMII_RXID is used to set the rx only internal
delay. Similarly issue happens for PHY_INTERFACE_MODE_RGMII_TXID.
Fix this by checking also the interface type if a particular delay
value is missing in the phy dt bindings. Also update the DT document
accordingly.

Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,dp83867.txt | 6 ++++--
 drivers/net/phy/dp83867.c                            | 8 ++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 85bf945b898f..afe9630a5e7d 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -3,9 +3,11 @@
 Required properties:
 	- reg - The ID number for the phy, usually a small integer
 	- ti,rx-internal-delay - RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h
-		for applicable values
+		for applicable values. Required only if interface type is
+		PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID
 	- ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
-		for applicable values
+		for applicable values. Required only if interface type is
+		PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID
 	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
 		for applicable values
 
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index e84ae084e259..ca1b462bf7b2 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -132,12 +132,16 @@ static int dp83867_of_init(struct phy_device *phydev)
 
 	ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
 				   &dp83867->rx_id_delay);
-	if (ret)
+	if (ret &&
+	    (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	     phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID))
 		return ret;
 
 	ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
 				   &dp83867->tx_id_delay);
-	if (ret)
+	if (ret &&
+	    (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	     phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID))
 		return ret;
 
 	return of_property_read_u32(of_node, "ti,fifo-depth",

From 57d5f64d83ab5b5a5118b1597386dd76eaf4340d Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Fri, 13 Jan 2017 15:46:25 +0100
Subject: [PATCH 248/258] tipc: allocate user memory with GFP_KERNEL flag

Until now, we allocate memory always with GFP_ATOMIC flag.
When the system is under memory pressure and a user tries to send,
the send fails due to low memory. However, the user application
can wait for free memory if we allocate it using GFP_KERNEL flag.

In this commit, we use allocate memory with GFP_KERNEL for all user
allocation.

Reported-by: Rune Torgersen <runet@innovsys.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c   |  4 ++--
 net/tipc/link.c       |  2 +-
 net/tipc/msg.c        | 16 ++++++++--------
 net/tipc/msg.h        |  2 +-
 net/tipc/name_distr.c |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 6b109a808d4c..02462d67d191 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 
 	/* Send response, if necessary */
 	if (respond && (mtyp == DSC_REQ_MSG)) {
-		rskb = tipc_buf_acquire(MAX_H_SIZE);
+		rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
 		if (!rskb)
 			return;
 		tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
@@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
 	req = kmalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req)
 		return -ENOMEM;
-	req->buf = tipc_buf_acquire(MAX_H_SIZE);
+	req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
 	if (!req->buf) {
 		kfree(req);
 		return -ENOMEM;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bda89bf9f4ff..4e8647aef01c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1395,7 +1395,7 @@ tnl:
 			msg_set_seqno(hdr, seqno++);
 		pktlen = msg_size(hdr);
 		msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
-		tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE);
+		tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
 		if (!tnlskb) {
 			pr_warn("%sunable to send packet\n", link_co_err);
 			return;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index a22be502f1bd..ab02d0742476 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -58,12 +58,12 @@ static unsigned int align(unsigned int i)
  * NOTE: Headroom is reserved to allow prepending of a data link header.
  *       There may also be unrequested tailroom present at the buffer's end.
  */
-struct sk_buff *tipc_buf_acquire(u32 size)
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
 
-	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+	skb = alloc_skb_fclone(buf_size, gfp);
 	if (skb) {
 		skb_reserve(skb, BUF_HEADROOM);
 		skb_put(skb, size);
@@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
 	struct tipc_msg *msg;
 	struct sk_buff *buf;
 
-	buf = tipc_buf_acquire(hdr_sz + data_sz);
+	buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC);
 	if (unlikely(!buf))
 		return NULL;
 
@@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 
 	/* No fragmentation needed? */
 	if (likely(msz <= pktmax)) {
-		skb = tipc_buf_acquire(msz);
+		skb = tipc_buf_acquire(msz, GFP_KERNEL);
 		if (unlikely(!skb))
 			return -ENOMEM;
 		skb_orphan(skb);
@@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 	msg_set_importance(&pkthdr, msg_importance(mhdr));
 
 	/* Prepare first fragment */
-	skb = tipc_buf_acquire(pktmax);
+	skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 	skb_orphan(skb);
@@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 			pktsz = drem + INT_H_SIZE;
 		else
 			pktsz = pktmax;
-		skb = tipc_buf_acquire(pktsz);
+		skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
 		if (!skb) {
 			rc = -ENOMEM;
 			goto error;
@@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb,  struct tipc_msg *msg,
 	if (msz > (max / 2))
 		return false;
 
-	_skb = tipc_buf_acquire(max);
+	_skb = tipc_buf_acquire(max, GFP_ATOMIC);
 	if (!_skb)
 		return false;
 
@@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 
 	/* Never return SHORT header; expand by replacing buffer if necessary */
 	if (msg_short(hdr)) {
-		*skb = tipc_buf_acquire(BASIC_H_SIZE + dlen);
+		*skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC);
 		if (!*skb)
 			goto exit;
 		memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 8d408612ffa4..2c3dc38abf9c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -820,7 +820,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
 	return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
 }
 
-struct sk_buff *tipc_buf_acquire(u32 size);
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
 bool tipc_msg_validate(struct sk_buff *skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
 void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c1cfd92de17a..23f8899e0f8c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -69,7 +69,7 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 					 u32 dest)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
+	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {

From f1f7714ea51c56b7163fb1a5acf39c6a204dd758 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 13 Jan 2017 23:38:15 +0100
Subject: [PATCH 249/258] bpf: rework prog_digest into prog_tag

Commit 7bd509e311f4 ("bpf: add prog_digest and expose it via
fdinfo/netlink") was recently discussed, partially due to
admittedly suboptimal name of "prog_digest" in combination
with sha1 hash usage, thus inevitably and rightfully concerns
about its security in terms of collision resistance were
raised with regards to use-cases.

The intended use cases are for debugging resp. introspection
only for providing a stable "tag" over the instruction sequence
that both kernel and user space can calculate independently.
It's not usable at all for making a security relevant decision.
So collisions where two different instruction sequences generate
the same tag can happen, but ideally at a rather low rate. The
"tag" will be dumped in hex and is short enough to introspect
in tracepoints or kallsyms output along with other data such
as stack trace, etc. Thus, this patch performs a rename into
prog_tag and truncates the tag to a short output (64 bits) to
make it obvious it's not collision-free.

Should in future a hash or facility be needed with a security
relevant focus, then we can think about requirements, constraints,
etc that would fit to that situation. For now, rework the exposed
parts for the current use cases as long as nothing has been
released yet. Tested on x86_64 and s390x.

Fixes: 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h                |  2 +-
 include/linux/filter.h             |  6 ++++--
 include/uapi/linux/pkt_cls.h       |  2 +-
 include/uapi/linux/tc_act/tc_bpf.h |  2 +-
 kernel/bpf/core.c                  | 14 ++++++++------
 kernel/bpf/syscall.c               |  8 ++++----
 kernel/bpf/verifier.c              |  2 +-
 net/sched/act_bpf.c                |  5 ++---
 net/sched/cls_bpf.c                |  4 ++--
 9 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f74ae68086dc..05cf951df3fe 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
-int bpf_prog_calc_digest(struct bpf_prog *fp);
+int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a0934e6c9bab..e4eb2546339a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -57,6 +57,8 @@ struct bpf_prog_aux;
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
+#define BPF_TAG_SIZE	8
+
 /* Helper macros for filter block array initializers. */
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@@ -408,7 +410,7 @@ struct bpf_prog {
 	kmemcheck_bitfield_end(meta);
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
-	u32			digest[SHA_DIGEST_WORDS]; /* Program digest */
+	u8			tag[BPF_TAG_SIZE];
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	unsigned int		(*bpf_func)(const void *ctx,
@@ -519,7 +521,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
 	return prog->len * sizeof(struct bpf_insn);
 }
 
-static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog)
+static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
 {
 	return round_up(bpf_prog_insn_size(prog) +
 			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index cb4bcdc58543..a4dcd88ec271 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -397,7 +397,7 @@ enum {
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
 	TCA_BPF_FLAGS_GEN,
-	TCA_BPF_DIGEST,
+	TCA_BPF_TAG,
 	__TCA_BPF_MAX,
 };
 
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index a6b88a6f7f71..975b50dc8d1d 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -27,7 +27,7 @@ enum {
 	TCA_ACT_BPF_FD,
 	TCA_ACT_BPF_NAME,
 	TCA_ACT_BPF_PAD,
-	TCA_ACT_BPF_DIGEST,
+	TCA_ACT_BPF_TAG,
 	__TCA_ACT_BPF_MAX,
 };
 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1eb4f1303756..503d4211988a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -146,10 +146,11 @@ void __bpf_prog_free(struct bpf_prog *fp)
 	vfree(fp);
 }
 
-int bpf_prog_calc_digest(struct bpf_prog *fp)
+int bpf_prog_calc_tag(struct bpf_prog *fp)
 {
 	const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
-	u32 raw_size = bpf_prog_digest_scratch_size(fp);
+	u32 raw_size = bpf_prog_tag_scratch_size(fp);
+	u32 digest[SHA_DIGEST_WORDS];
 	u32 ws[SHA_WORKSPACE_WORDS];
 	u32 i, bsize, psize, blocks;
 	struct bpf_insn *dst;
@@ -162,7 +163,7 @@ int bpf_prog_calc_digest(struct bpf_prog *fp)
 	if (!raw)
 		return -ENOMEM;
 
-	sha_init(fp->digest);
+	sha_init(digest);
 	memset(ws, 0, sizeof(ws));
 
 	/* We need to take out the map fd for the digest calculation
@@ -204,13 +205,14 @@ int bpf_prog_calc_digest(struct bpf_prog *fp)
 	*bits = cpu_to_be64((psize - 1) << 3);
 
 	while (blocks--) {
-		sha_transform(fp->digest, todo, ws);
+		sha_transform(digest, todo, ws);
 		todo += SHA_MESSAGE_BYTES;
 	}
 
-	result = (__force __be32 *)fp->digest;
+	result = (__force __be32 *)digest;
 	for (i = 0; i < SHA_DIGEST_WORDS; i++)
-		result[i] = cpu_to_be32(fp->digest[i]);
+		result[i] = cpu_to_be32(digest[i]);
+	memcpy(fp->tag, result, sizeof(fp->tag));
 
 	vfree(raw);
 	return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e89acea22ecf..1d6b29e4e2c3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -688,17 +688,17 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_prog *prog = filp->private_data;
-	char prog_digest[sizeof(prog->digest) * 2 + 1] = { };
+	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 
-	bin2hex(prog_digest, prog->digest, sizeof(prog->digest));
+	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
 		   "prog_type:\t%u\n"
 		   "prog_jited:\t%u\n"
-		   "prog_digest:\t%s\n"
+		   "prog_tag:\t%s\n"
 		   "memlock:\t%llu\n",
 		   prog->type,
 		   prog->jited,
-		   prog_digest,
+		   prog_tag,
 		   prog->pages * 1ULL << PAGE_SHIFT);
 }
 #endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 83ed2f8f6f22..cdc43b899f28 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2936,7 +2936,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 	int insn_cnt = env->prog->len;
 	int i, j, err;
 
-	err = bpf_prog_calc_digest(env->prog);
+	err = bpf_prog_calc_tag(env->prog);
 	if (err)
 		return err;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1c60317f0121..520baa41cba3 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -123,12 +123,11 @@ static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
 	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
 		return -EMSGSIZE;
 
-	nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST,
-			  sizeof(prog->filter->digest));
+	nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag));
 	if (nla == NULL)
 		return -EMSGSIZE;
 
-	memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
+	memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
 
 	return 0;
 }
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index adc776048d1a..d9c97018317d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -555,11 +555,11 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 	    nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
 		return -EMSGSIZE;
 
-	nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest));
+	nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag));
 	if (nla == NULL)
 		return -EMSGSIZE;
 
-	memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
+	memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
 
 	return 0;
 }

From 8a367e74c0120ef68c8c70d5a025648c96626dff Mon Sep 17 00:00:00 2001
From: Basil Gunn <basil@pacabunga.com>
Date: Sat, 14 Jan 2017 12:18:55 -0800
Subject: [PATCH 250/258] ax25: Fix segfault after sock connection timeout

The ax.25 socket connection timed out & the sock struct has been
previously taken down ie. sock struct is now a NULL pointer. Checking
the sock_flag causes the segfault.  Check if the socket struct pointer
is NULL before checking sock_flag. This segfault is seen in
timed out netrom connections.

Please submit to -stable.

Signed-off-by: Basil Gunn <basil@pacabunga.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_subr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 4855d18a8511..038b109b2be7 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
-	if (!sock_flag(ax25->sk, SOCK_DESTROY))
+	if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
 		ax25_stop_heartbeat(ax25);
 	ax25_stop_t1timer(ax25);
 	ax25_stop_t2timer(ax25);

From abeffce90c7f6ce74de9794ad0977a168edf8ef6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 15 Jan 2017 19:50:46 +0200
Subject: [PATCH 251/258] net/mlx5e: Fix a -Wmaybe-uninitialized warning

As found by Olof's build bot, we gain a harmless warning about a
potential uninitialized variable reference in mlx5:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c: In function 'parse_tc_fdb_actions':
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:769:13: warning: 'out_dev' may be used uninitialized in this function [-Wmaybe-uninitialized]
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:811:21: note: 'out_dev' was declared here

This was introduced through the addition of an 'IS_ERR/PTR_ERR' pair
that gcc is unfortunately unable to completely figure out.

The problem being gcc cannot tell that if(IS_ERR()) in
mlx5e_route_lookup_ipv4() is equivalent to checking if(err) later,
so it assumes that 'out_dev' is used after the 'return PTR_ERR(rt)'.

The PTR_ERR_OR_ZERO() case by comparison is fairly easy to detect
by gcc, so it can't get that wrong, so it no longer warns.

Hadar Hen Zion already attempted to fix the warning earlier by adding fake
initializations, but that ended up not fully addressing all warnings, so
I'm reverting it now that it is no longer needed.

Link: http://arm-soc.lixom.net/buildlogs/mainline/v4.10-rc3-98-gcff3b2c/
Fixes: a42485eb0ee4 ("net/mlx5e: TC ipv4 tunnel encap offload error flow fixes")
Fixes: a757d108dc1a ("net/mlx5e: Fix kbuild warnings for uninitialized parameters")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 118cea5e5489..46bef6a26a8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -668,9 +668,12 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 	int ttl;
 
 #if IS_ENABLED(CONFIG_INET)
+	int ret;
+
 	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
+	ret = PTR_ERR_OR_ZERO(rt);
+	if (ret)
+		return ret;
 #else
 	return -EOPNOTSUPP;
 #endif
@@ -741,8 +744,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	struct flowi4 fl4 = {};
 	char *encap_header;
 	int encap_size;
-	__be32 saddr = 0;
-	int ttl = 0;
+	__be32 saddr;
+	int ttl;
 	int err;
 
 	encap_header = kzalloc(max_encap_size, GFP_KERNEL);

From b618ab4561d40664492cf9f9507f19a1c8272970 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 15 Jan 2017 19:19:00 +0100
Subject: [PATCH 252/258] net: stmmac: don't use netdev_[dbg, info, ..] before
 net_device is registered

Don't use netdev_info and friends before the net_device is registered.
This avoids ugly messages like
"meson8b-dwmac c9410000.ethernet (unnamed net_device) (uninitialized):
Enable RX Mitigation via HW Watchdog Timer"

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a276a32d57f2..e3f6389e1b01 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3326,9 +3326,9 @@ int stmmac_dvr_probe(struct device *device,
 	    (priv->plat->maxmtu >= ndev->min_mtu))
 		ndev->max_mtu = priv->plat->maxmtu;
 	else if (priv->plat->maxmtu < ndev->min_mtu)
-		netdev_warn(priv->dev,
-			    "%s: warning: maxmtu having invalid value (%d)\n",
-			    __func__, priv->plat->maxmtu);
+		dev_warn(priv->device,
+			 "%s: warning: maxmtu having invalid value (%d)\n",
+			 __func__, priv->plat->maxmtu);
 
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
@@ -3340,7 +3340,8 @@ int stmmac_dvr_probe(struct device *device,
 	 */
 	if ((priv->synopsys_id >= DWMAC_CORE_3_50) && (!priv->plat->riwt_off)) {
 		priv->use_riwt = 1;
-		netdev_info(priv->dev, "Enable RX Mitigation via HW Watchdog Timer\n");
+		dev_info(priv->device,
+			 "Enable RX Mitigation via HW Watchdog Timer\n");
 	}
 
 	netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
@@ -3366,17 +3367,17 @@ int stmmac_dvr_probe(struct device *device,
 		/* MDIO bus Registration */
 		ret = stmmac_mdio_register(ndev);
 		if (ret < 0) {
-			netdev_err(priv->dev,
-				   "%s: MDIO bus (id: %d) registration failed",
-				   __func__, priv->plat->bus_id);
+			dev_err(priv->device,
+				"%s: MDIO bus (id: %d) registration failed",
+				__func__, priv->plat->bus_id);
 			goto error_mdio_register;
 		}
 	}
 
 	ret = register_netdev(ndev);
 	if (ret) {
-		netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
-			   __func__, ret);
+		dev_err(priv->device, "%s: ERROR %i registering the device\n",
+			__func__, ret);
 		goto error_netdev_register;
 	}
 

From 291c566a28910614ce42d0ffe82196eddd6346f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 16 Jan 2017 18:31:37 +0200
Subject: [PATCH 253/258] net/mlx4_core: Fix racy CQ (Completion Queue) free

In function mlx4_cq_completion() and mlx4_cq_event(), the
radix_tree_lookup requires a rcu_read_lock.
This is mandatory: if another core frees the CQ, it could
run the radix_tree_node_rcu_free() call_rcu() callback while
its being used by the radix tree lookup function.

Additionally, in function mlx4_cq_event(), since we are adding
the rcu lock around the radix-tree lookup, we no longer need to take
the spinlock. Also, the synchronize_irq() call for the async event
eliminates the need for incrementing the cq reference count in
mlx4_cq_event().

Other changes:
1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock:
   we no longer take this spinlock in the interrupt context.
   The spinlock here, therefore, simply protects against different
   threads simultaneously invoking mlx4_cq_free() for different cq's.

2. In function mlx4_cq_free(), we move the radix tree delete to before
   the synchronize_irq() calls. This guarantees that we will not
   access this cq during any subsequent interrupts, and therefore can
   safely free the CQ after the synchronize_irq calls. The rcu_read_lock
   in the interrupt handlers only needs to protect against corrupting the
   radix tree; the interrupt handlers may access the cq outside the
   rcu_read_lock due to the synchronize_irq calls which protect against
   premature freeing of the cq.

3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg.

4. We leave the cq reference count mechanism in place, because it is
   still needed for the cq completion tasklet mechanism.

Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ")
Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cq.c | 38 +++++++++++++------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index a849da92f857..6b8635378f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
 {
 	struct mlx4_cq *cq;
 
+	rcu_read_lock();
 	cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
 			       cqn & (dev->caps.num_cqs - 1));
+	rcu_read_unlock();
+
 	if (!cq) {
 		mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
 		return;
 	}
 
+	/* Acessing the CQ outside of rcu_read_lock is safe, because
+	 * the CQ is freed only after interrupt handling is completed.
+	 */
 	++cq->arm_sn;
 
 	cq->comp(cq);
@@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
 	struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
 	struct mlx4_cq *cq;
 
-	spin_lock(&cq_table->lock);
-
+	rcu_read_lock();
 	cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
-	if (cq)
-		atomic_inc(&cq->refcount);
-
-	spin_unlock(&cq_table->lock);
+	rcu_read_unlock();
 
 	if (!cq) {
-		mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+		mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
 		return;
 	}
 
+	/* Acessing the CQ outside of rcu_read_lock is safe, because
+	 * the CQ is freed only after interrupt handling is completed.
+	 */
 	cq->event(cq, event_type);
-
-	if (atomic_dec_and_test(&cq->refcount))
-		complete(&cq->free);
 }
 
 static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	if (err)
 		return err;
 
-	spin_lock_irq(&cq_table->lock);
+	spin_lock(&cq_table->lock);
 	err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
-	spin_unlock_irq(&cq_table->lock);
+	spin_unlock(&cq_table->lock);
 	if (err)
 		goto err_icm;
 
@@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	return 0;
 
 err_radix:
-	spin_lock_irq(&cq_table->lock);
+	spin_lock(&cq_table->lock);
 	radix_tree_delete(&cq_table->tree, cq->cqn);
-	spin_unlock_irq(&cq_table->lock);
+	spin_unlock(&cq_table->lock);
 
 err_icm:
 	mlx4_cq_free_icm(dev, cq->cqn);
@@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 	if (err)
 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
 
+	spin_lock(&cq_table->lock);
+	radix_tree_delete(&cq_table->tree, cq->cqn);
+	spin_unlock(&cq_table->lock);
+
 	synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
 	if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
 	    priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
 		synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
 
-	spin_lock_irq(&cq_table->lock);
-	radix_tree_delete(&cq_table->tree, cq->cqn);
-	spin_unlock_irq(&cq_table->lock);
-
 	if (atomic_dec_and_test(&cq->refcount))
 		complete(&cq->free);
 	wait_for_completion(&cq->free);

From 7c3945bc2073554bb2ecf983e073dee686679c53 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 16 Jan 2017 18:31:38 +0200
Subject: [PATCH 254/258] net/mlx4_core: Fix when to save some qp context flags
 for dynamic VST to VGT transitions

Save the qp context flags byte containing the flag disabling vlan stripping
in the RESET to INIT qp transition, rather than in the INIT to RTR
transition. Per the firmware spec, the flags in this byte are active
in the RESET to INIT transition.

As a result of saving the flags in the incorrect qp transition, when
switching dynamically from VGT to VST and back to VGT, the vlan
remained stripped (as is required for VST) and did not return to
not-stripped (as is required for VGT).

Fixes: f0f829bf42cd ("net/mlx4_core: Add immediate activate for VGT->VST->VGT")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 56185a0b827d..1822382212ee 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2980,6 +2980,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 		put_res(dev, slave, srqn, RES_SRQ);
 		qp->srq = srq;
 	}
+
+	/* Save param3 for dynamic changes from VST back to VGT */
+	qp->param3 = qpc->param3;
 	put_res(dev, slave, rcqn, RES_CQ);
 	put_res(dev, slave, mtt_base, RES_MTT);
 	res_end_move(dev, slave, RES_QP, qpn);
@@ -3772,7 +3775,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 	int qpn = vhcr->in_modifier & 0x7fffff;
 	struct res_qp *qp;
 	u8 orig_sched_queue;
-	__be32	orig_param3 = qpc->param3;
 	u8 orig_vlan_control = qpc->pri_path.vlan_control;
 	u8 orig_fvl_rx = qpc->pri_path.fvl_rx;
 	u8 orig_pri_path_fl = qpc->pri_path.fl;
@@ -3814,7 +3816,6 @@ out:
 	 */
 	if (!err) {
 		qp->sched_queue = orig_sched_queue;
-		qp->param3	= orig_param3;
 		qp->vlan_control = orig_vlan_control;
 		qp->fvl_rx	=  orig_fvl_rx;
 		qp->pri_path_fl = orig_pri_path_fl;

From 9577b174cd0323d287c994ef0891db71666d0765 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 16 Jan 2017 18:31:39 +0200
Subject: [PATCH 255/258] net/mlx4_core: Eliminate warning messages for
 SRQ_LIMIT under SRIOV

When running SRIOV, warnings for SRQ LIMIT events flood the Hypervisor's
message log when (correct, normally operating) apps use SRQ LIMIT events
as a trigger to post WQEs to SRQs.

Add more information to the existing debug printout for SRQ_LIMIT, and
output the warning messages only for the SRQ CATAS ERROR event.

Fixes: acba2420f9d2 ("mlx4_core: Add wrapper functions and comm channel and slave event support to EQs")
Fixes: e0debf9cb50d ("mlx4_core: Reduce warning message for SRQ_LIMIT event to debug level")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/eq.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index cd3638e6fe25..0509996957d9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -554,8 +554,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 			break;
 
 		case MLX4_EVENT_TYPE_SRQ_LIMIT:
-			mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n",
-				 __func__);
+			mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n",
+				 __func__, be32_to_cpu(eqe->event.srq.srqn),
+				 eq->eqn);
 		case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
 			if (mlx4_is_master(dev)) {
 				/* forward only to slave owning the SRQ */
@@ -570,15 +571,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 						  eq->eqn, eq->cons_index, ret);
 					break;
 				}
-				mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
-					  __func__, slave,
-					  be32_to_cpu(eqe->event.srq.srqn),
-					  eqe->type, eqe->subtype);
+				if (eqe->type ==
+				    MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)
+					mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
+						  __func__, slave,
+						  be32_to_cpu(eqe->event.srq.srqn),
+						  eqe->type, eqe->subtype);
 
 				if (!ret && slave != dev->caps.function) {
-					mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n",
-						  __func__, eqe->type,
-						  eqe->subtype, slave);
+					if (eqe->type ==
+					    MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)
+						mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n",
+							  __func__, eqe->type,
+							  eqe->subtype, slave);
 					mlx4_slave_event(dev, slave, eqe);
 					break;
 				}

From 0faa9cb5b3836a979864a6357e01d2046884ad52 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 15 Jan 2017 10:14:06 -0500
Subject: [PATCH 256/258] net sched actions: fix refcnt when GETing of action
 after bind

Demonstrating the issue:

.. add a drop action
$sudo $TC actions add action drop index 10

.. retrieve it
$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 2 bind 0 installed 29 sec used 29 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

... bug 1 above: reference is two.
    Reference is actually 1 but we forget to subtract 1.

... do a GET again and we see the same issue
    try a few times and nothing changes
~$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 2 bind 0 installed 31 sec used 31 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

... lets try to bind the action to a filter..
$ sudo $TC qdisc add dev lo ingress
$ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \
  u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10

... and now a few GETs:
$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 3 bind 1 installed 204 sec used 204 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 4 bind 1 installed 206 sec used 206 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 5 bind 1 installed 235 sec used 235 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

.... as can be observed the reference count keeps going up.

After the fix

$ sudo $TC actions add action drop index 10
$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 1 bind 0 installed 4 sec used 4 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 1 bind 0 installed 6 sec used 6 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

$ sudo $TC qdisc add dev lo ingress
$ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \
  u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10

$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 2 bind 1 installed 32 sec used 32 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

$ sudo $TC -s actions get action gact index 10

	action order 1: gact action drop
	 random type none pass val 0
	 index 10 ref 2 bind 1 installed 33 sec used 33 sec
 	Action statistics:
	Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

Fixes: aecc5cefc389 ("net sched actions: fix GETing actions")
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2095c83ce773..e10456ef6f7a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -900,8 +900,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 		act->order = i;
-		if (event == RTM_GETACTION)
-			act->tcfa_refcnt++;
 		list_add_tail(&act->list, &actions);
 	}
 
@@ -914,7 +912,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 	}
 err:
-	tcf_action_destroy(&actions, 0);
+	if (event != RTM_GETACTION)
+		tcf_action_destroy(&actions, 0);
 	return ret;
 }
 

From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001
From: Rolf Neugebauer <rolf.neugebauer@docker.com>
Date: Tue, 17 Jan 2017 18:13:51 +0000
Subject: [PATCH 257/258] virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit

This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a
subtle change in how the virtio_net flags are derived from the SKBs
ip_summed field.

With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID
when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to
ip_summed == CHECKSUM_NONE, which should be the same.

Further, the virtio spec 1.0 / CS04 explicitly says that
VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver.

Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb")
Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 66204007d7ac..56436472ccc7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 				skb_checksum_start_offset(skb));
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
-	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
 
 	return 0;

From a249708bc2aa1fe3ddf15dfac22bee519d15996b Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Tue, 17 Jan 2017 12:23:21 +0100
Subject: [PATCH 258/258] stmmac: add missing of_node_put

The function stmmac_dt_phy provides several possibilities for initializing
plat->mdio_node, all of which have the effect of increasing the reference
count of the assigned value.  This field is not updated elsewhere, so the
value is live until the end of the lifetime of plat (devm_allocated), just
after the end of stmmac_remove_config_dt.  Thus, add an of_node_put on
plat->mdio_node in stmmac_remove_config_dt.  It is possible that the field
mdio_node is never initialized, but of_node_put is NULL-safe, so it is also
safe to call of_node_put in that case.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 082cd48db6a7..36942f5a6a53 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -351,6 +351,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev,
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	of_node_put(plat->phy_node);
+	of_node_put(plat->mdio_node);
 }
 #else
 struct plat_stmmacenet_data *