From 6fce983f9b3ef51d47e647b2cff15049ef803781 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Tue, 13 Dec 2016 11:03:49 +0000
Subject: [PATCH 001/297] ASoC: dwc: Fix PIO mode initialization

We can no longer rely on the return value of
devm_snd_dmaengine_pcm_register(...) to check if the DMA
handle is declared in the DT.

Previously this check activated PIO mode but currently
dma_request_chan returns either a valid channel or -EPROBE_DEFER.

In order to activate PIO mode check instead if the interrupt
line is declared. This reflects better what is documented in
the DT bindings (see Documentation/devicetree/bindings/sound/
designware-i2s.txt).

Also, initialize use_pio variable which was never being set
causing PIO mode to never work.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/dwc/designware_i2s.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c
index 2998954a1c74..bdf8398cbc81 100644
--- a/sound/soc/dwc/designware_i2s.c
+++ b/sound/soc/dwc/designware_i2s.c
@@ -681,22 +681,19 @@ static int dw_i2s_probe(struct platform_device *pdev)
 	}
 
 	if (!pdata) {
-		ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
-		if (ret == -EPROBE_DEFER) {
-			dev_err(&pdev->dev,
-				"failed to register PCM, deferring probe\n");
-			return ret;
-		} else if (ret) {
-			dev_err(&pdev->dev,
-				"Could not register DMA PCM: %d\n"
-				"falling back to PIO mode\n", ret);
+		if (irq >= 0) {
 			ret = dw_pcm_register(pdev);
-			if (ret) {
-				dev_err(&pdev->dev,
-					"Could not register PIO PCM: %d\n",
+			dev->use_pio = true;
+		} else {
+			ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL,
+					0);
+			dev->use_pio = false;
+		}
+
+		if (ret) {
+			dev_err(&pdev->dev, "could not register pcm: %d\n",
 					ret);
-				goto err_clk_disable;
-			}
+			goto err_clk_disable;
 		}
 	}
 

From 8010d7feb2f0367ae573ad601b2905e29db50cd3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 11 Dec 2016 20:09:23 +0100
Subject: [PATCH 002/297] netfilter: nft_quota: reset quota after dump

Dumping of netlink attributes may fail due to insufficient room in the
skbuff, so let's reset consumed quota if we succeed to put netlink
attributes into the skbuff.

Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_quota.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index bd6efc53f26d..2d6fe3559912 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -110,30 +110,32 @@ static int nft_quota_obj_init(const struct nlattr * const tb[],
 static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
 			     bool reset)
 {
+	u64 consumed, consumed_cap;
 	u32 flags = priv->flags;
-	u64 consumed;
-
-	if (reset) {
-		consumed = atomic64_xchg(&priv->consumed, 0);
-		if (test_and_clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
-			flags |= NFT_QUOTA_F_DEPLETED;
-	} else {
-		consumed = atomic64_read(&priv->consumed);
-	}
 
 	/* Since we inconditionally increment consumed quota for each packet
 	 * that we see, don't go over the quota boundary in what we send to
 	 * userspace.
 	 */
-	if (consumed > priv->quota)
-		consumed = priv->quota;
+	consumed = atomic64_read(&priv->consumed);
+	if (consumed >= priv->quota) {
+		consumed_cap = priv->quota;
+		flags |= NFT_QUOTA_F_DEPLETED;
+	} else {
+		consumed_cap = consumed;
+	}
 
 	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
 			 NFTA_QUOTA_PAD) ||
-	    nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed),
+	    nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap),
 			 NFTA_QUOTA_PAD) ||
 	    nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
 		goto nla_put_failure;
+
+	if (reset) {
+		atomic64_sub(consumed, &priv->consumed);
+		clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
+	}
 	return 0;
 
 nla_put_failure:

From c2e756ff9e699865d294cdc112acfc36419cf5cc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 11 Dec 2016 20:46:51 +0100
Subject: [PATCH 003/297] netfilter: nft_queue: use raw_smp_processor_id()

Using smp_processor_id() causes splats with PREEMPT_RCU:

[19379.552780] BUG: using smp_processor_id() in preemptible [00000000] code: ping/32389
[19379.552793] caller is debug_smp_processor_id+0x17/0x19
[...]
[19379.552823] Call Trace:
[19379.552832]  [<ffffffff81274e9e>] dump_stack+0x67/0x90
[19379.552837]  [<ffffffff8129a4d4>] check_preemption_disabled+0xe5/0xf5
[19379.552842]  [<ffffffff8129a4fb>] debug_smp_processor_id+0x17/0x19
[19379.552849]  [<ffffffffa07c42dd>] nft_queue_eval+0x35/0x20c [nft_queue]

No need to disable preemption since we only fetch the numeric value, so
let's use raw_smp_processor_id() instead.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 3e19fa1230dc..dbb6aaff67ec 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -38,7 +38,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
 
 	if (priv->queues_total > 1) {
 		if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
-			int cpu = smp_processor_id();
+			int cpu = raw_smp_processor_id();
 
 			queue = priv->queuenum + cpu % priv->queues_total;
 		} else {

From 3e38df136e453aa69eb4472108ebce2fb00b1ba6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 13 Dec 2016 13:59:33 +0100
Subject: [PATCH 004/297] netfilter: nf_tables: fix oob access

BUG: KASAN: slab-out-of-bounds in nf_tables_rule_destroy+0xf1/0x130 at addr ffff88006a4c35c8
Read of size 8 by task nft/1607

When we've destroyed last valid expr, nft_expr_next() returns an invalid expr.
We must not dereference it unless it passes != nft_expr_last() check.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a019a87e58ee..0db5f9782265 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2115,7 +2115,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 	 * is called on error from nf_tables_newrule().
 	 */
 	expr = nft_expr_first(rule);
-	while (expr->ops && expr != nft_expr_last(rule)) {
+	while (expr != nft_expr_last(rule) && expr->ops) {
 		nf_tables_expr_destroy(ctx, expr);
 		expr = nft_expr_next(expr);
 	}

From 053d20f5712529016eae10356e0dea9b360325bd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Dec 2016 12:15:49 +0100
Subject: [PATCH 005/297] netfilter: nft_payload: mangle ckecksum if
 NFT_PAYLOAD_L4CSUM_PSEUDOHDR is set

If the NFT_PAYLOAD_L4CSUM_PSEUDOHDR flag is set, then mangle layer 4
checksum. This should not depend on csum_type NFT_PAYLOAD_CSUM_INET
since IPv6 header has no checksum field, but still an update of any of
the pseudoheader fields may trigger a layer 4 checksum update.

Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_payload.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 36d2b1096546..7d699bbd45b0 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -250,6 +250,22 @@ static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
 	return 0;
 }
 
+static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
+				 __wsum fsum, __wsum tsum, int csum_offset)
+{
+	__sum16 sum;
+
+	if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	nft_csum_replace(&sum, fsum, tsum);
+	if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+	    skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	return 0;
+}
+
 static void nft_payload_set_eval(const struct nft_expr *expr,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt)
@@ -259,7 +275,6 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
 	const u32 *src = &regs->data[priv->sreg];
 	int offset, csum_offset;
 	__wsum fsum, tsum;
-	__sum16 sum;
 
 	switch (priv->base) {
 	case NFT_PAYLOAD_LL_HEADER:
@@ -282,18 +297,14 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
 	csum_offset = offset + priv->csum_offset;
 	offset += priv->offset;
 
-	if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+	if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) &&
 	    (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
 	     skb->ip_summed != CHECKSUM_PARTIAL)) {
-		if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
-			goto err;
-
 		fsum = skb_checksum(skb, offset, priv->len, 0);
 		tsum = csum_partial(src, priv->len, 0);
-		nft_csum_replace(&sum, fsum, tsum);
 
-		if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
-		    skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+		if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+		    nft_payload_csum_inet(skb, src, fsum, tsum, csum_offset))
 			goto err;
 
 		if (priv->csum_flags &&

From 0ea617a298dcdc2251b4e10f83ac3f3e627b66e3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 8 Dec 2016 13:05:43 +0000
Subject: [PATCH 006/297] ASoC: rsnd: don't double free kctrl

On an error, snd_ctl_add already free's kctrl, so calling snd_ctl_free_one
to free it again leads to a double free error.  Fix this by removing
the extraneous snd_ctl_free_one call.

Issue found using static analysis with CoverityScan, CID 1372908

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 4bd68de76130..99b5b0835c1e 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1030,10 +1030,8 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod,
 		return -ENOMEM;
 
 	ret = snd_ctl_add(card, kctrl);
-	if (ret < 0) {
-		snd_ctl_free_one(kctrl);
+	if (ret < 0)
 		return ret;
-	}
 
 	cfg->update = update;
 	cfg->card = card;

From c2b36129ce53a22b89dd2b88db33e7ffdefe0f41 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Dec 2016 14:17:47 +0000
Subject: [PATCH 007/297] ASoC: topology: kfree kcontrol->private_value before
 freeing kcontrol

kcontrol->private_value is being kfree'd after kcontrol has been freed
(in previous call to snd_ctl_remove).  Instead, fix this by kfreeing
the private_value before kcontrol.

CoverityScan CID#1388311 "Read from pointer after free"

Fixes: eea3dd4f1247a ("ASoC: topology: Only free TLV for volume mixers of a widget")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-topology.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 65670b2b408c..fbfb1fab88d5 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -514,13 +514,12 @@ static void remove_widget(struct snd_soc_component *comp,
 			    == SND_SOC_TPLG_TYPE_MIXER)
 				kfree(kcontrol->tlv.p);
 
-			snd_ctl_remove(card, kcontrol);
-
 			/* Private value is used as struct soc_mixer_control
 			 * for volume mixers or soc_bytes_ext for bytes
 			 * controls.
 			 */
 			kfree((void *)kcontrol->private_value);
+			snd_ctl_remove(card, kcontrol);
 		}
 		kfree(w->kcontrol_news);
 	}

From 9e4d59ada4d602e78eee9fb5f898ce61fdddb446 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 16 Dec 2016 18:26:54 +0900
Subject: [PATCH 008/297] ASoC: hdmi-codec: use unsigned type to structure
 members with bit-field

This is a fix for Linux 4.10-rc1.

In C language specification, a bit-field is interpreted as a signed or
unsigned integer type consisting of the specified number of bits.

In GCC manual, the range of a signed bit field of N bits is from
-(2^N) / 2 to ((2^N) / 2) - 1
https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Bit-Fields

Therefore, when defined as 1 bit-field with signed type, variables can
represents -1 and 0.

The snd-soc-hdmi-codec module includes a structure which has signed type
members with bit-fields. Codes of this module assign 0 and 1 to the
members. This seems to result in implementation-dependent behaviours.

As of v4.10-rc1 merge window, outside of sound subsystem, this structure
is referred by below GPU modules.
 - tda998x
 - sti-drm
 - mediatek-drm-hdmi
 - msm

As long as I review their codes relevant to the structure, the structure
members are used just for condition statements and printk formats.
My proposal of change is a bit intrusive to the printk formats but this
may be acceptable.

Totally, it's reasonable to use unsigned type for the structure members.
This bug is detected by Sparse, static code analyzer with below warnings.

./include/sound/hdmi-codec.h:39:26: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:40:28: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:41:29: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:42:31: error: dubious one-bit signed bitfield

Fixes: 09184118a8ab ("ASoC: hdmi-codec: Add hdmi-codec for external HDMI-encoders")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
CC: stable@vger.kernel.org
---
 include/sound/hdmi-codec.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h
index 530c57bdefa0..915c4357945c 100644
--- a/include/sound/hdmi-codec.h
+++ b/include/sound/hdmi-codec.h
@@ -36,10 +36,10 @@ struct hdmi_codec_daifmt {
 		HDMI_AC97,
 		HDMI_SPDIF,
 	} fmt;
-	int bit_clk_inv:1;
-	int frame_clk_inv:1;
-	int bit_clk_master:1;
-	int frame_clk_master:1;
+	unsigned int bit_clk_inv:1;
+	unsigned int frame_clk_inv:1;
+	unsigned int bit_clk_master:1;
+	unsigned int frame_clk_master:1;
 };
 
 /*

From 65dadffddbe44a60f8be9e95f264949ba1e547e9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Fri, 16 Dec 2016 14:43:39 -0800
Subject: [PATCH 009/297] Input: joydev - remove unused linux/miscdevice.h
 include

This patch remove the inclusion of linux/miscdevice.h for joydev
since it does not use miscdevice.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joydev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index f3135ae22df4..abd18f31b24f 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -22,7 +22,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/init.h>

From 41c567a5d7d1a986763e58c3394782813c3bcb03 Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Date: Sun, 18 Dec 2016 15:26:12 -0800
Subject: [PATCH 010/297] Input: i8042 - add Pegatron touchpad to noloop table

Avoid AUX loopback in Pegatron C15B touchpad, so input subsystem is able
to recognize a Synaptics touchpad in the AUX port.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=93791
(Touchpad is not detected on DNS 0801480 notebook (PEGATRON C15B))

Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 73a4e68448fc..381d802fe853 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"),
 		},
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "C15B"),
+		},
+	},
 	{ }
 };
 

From 67626c9323027534496d21cf32793121662d03c0 Mon Sep 17 00:00:00 2001
From: Aniroop Mathur <a.mathur@samsung.com>
Date: Sun, 18 Dec 2016 15:27:16 -0800
Subject: [PATCH 011/297] Input: synaptics_i2c - change msleep to usleep_range
 for small msecs

msleep(1~20) may not do what the caller intends, and will often sleep longer.
(~20 ms actual sleep for any value given in the 1~20ms range)
This is not the desired behaviour for many cases like device resume time,
device suspend time, device enable time, retry logic, etc.
Thus, change msleep to usleep_range for precise wakeups.

Signed-off-by: Aniroop Mathur <a.mathur@samsung.com>
Acked-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/synaptics_i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index aa7c5da60800..cb2bf203f4ca 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -29,7 +29,7 @@
  * after soft reset, we should wait for 1 ms
  * before the device becomes operational
  */
-#define SOFT_RESET_DELAY_MS	3
+#define SOFT_RESET_DELAY_US	3000
 /* and after hard reset, we should wait for max 500ms */
 #define HARD_RESET_DELAY_MS	500
 
@@ -311,7 +311,7 @@ static int synaptics_i2c_reset_config(struct i2c_client *client)
 	if (ret) {
 		dev_err(&client->dev, "Unable to reset device\n");
 	} else {
-		msleep(SOFT_RESET_DELAY_MS);
+		usleep_range(SOFT_RESET_DELAY_US, SOFT_RESET_DELAY_US + 100);
 		ret = synaptics_i2c_config(client);
 		if (ret)
 			dev_err(&client->dev, "Unable to config device\n");

From 4a8b3a682be9addff7dbd16371fa8c34103b5c31 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 16 Dec 2016 10:55:49 -0600
Subject: [PATCH 012/297] ASoC: Intel: bytcr_rt5640: fallback mechanism if MCLK
 is not enabled

Commit df1a2776a795 ("ASoC: Intel: bytcr_rt5640: add MCLK support")
was merged but the corresponding clock framework patches have not,
after being bumped from audio to clock to x86 domains. The missing
clock-related patches result in a regression starting with 4.9 with
the audio card not being created.

Rather than reverting this commit and all following updates already
queued up for 4.10, handle run-time dependency on MCLK and fall back
to the previous bit-clock mode. This provides the same functionality
as in 4.8 for Baytrail devices. On Baytrail-CR most devices remain
silent with this fallback but additional patches are needed anyway.
As suggested by Mark Brown, the fallback is only allowed with -ENOENT,
all other run-time errors, including -EPROBE_DEFER, will stop the probe
with no sound card registered.

This patch should be applied to -stable as well as ASoC 4.10 fixes

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 507a86a5eafe..e33e4777a65c 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -825,10 +825,20 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
 	if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && (is_valleyview())) {
 		priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
 		if (IS_ERR(priv->mclk)) {
+			ret_val = PTR_ERR(priv->mclk);
+
 			dev_err(&pdev->dev,
-				"Failed to get MCLK from pmc_plt_clk_3: %ld\n",
-				PTR_ERR(priv->mclk));
-			return PTR_ERR(priv->mclk);
+				"Failed to get MCLK from pmc_plt_clk_3: %d\n",
+				ret_val);
+
+			/*
+			 * Fall back to bit clock usage for -ENOENT (clock not
+			 * available likely due to missing dependencies), bail
+			 * for all other errors, including -EPROBE_DEFER
+			 */
+			if (ret_val != -ENOENT)
+				return ret_val;
+			byt_rt5640_quirk &= ~BYT_RT5640_MCLK_EN;
 		}
 	}
 

From 73ba39ab9307340dc98ec3622891314bbc09cc2e Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 4 Dec 2016 12:51:53 +0800
Subject: [PATCH 013/297] btrfs: return the actual error value from  from
 btrfs_uuid_tree_iterate

In function btrfs_uuid_tree_iterate(), errno is assigned to variable ret
on errors. However, it directly returns 0. It may be better to return
ret. This patch also removes the warning, because the caller already
prints a warning.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188731
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
[ edited subject ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/uuid-tree.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 161342b73ce5..726f928238d0 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -352,7 +352,5 @@ skip:
 
 out:
 	btrfs_free_path(path);
-	if (ret)
-		btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
-	return 0;
+	return ret;
 }

From 1cab2a84f470e15ecc8e5143bfe9398c6e888032 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Tue, 20 Dec 2016 10:29:12 +0000
Subject: [PATCH 014/297] ASoC: wm_adsp: Don't overrun firmware file buffer
 when reading region data

Protect against corrupt firmware files by ensuring that the length we
get for the data in a region actually lies within the available firmware
file data buffer.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 593b7d1aed46..d72ccef9e238 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1551,7 +1551,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 	const struct wmfw_region *region;
 	const struct wm_adsp_region *mem;
 	const char *region_name;
-	char *file, *text;
+	char *file, *text = NULL;
 	struct wm_adsp_buf *buf;
 	unsigned int reg;
 	int regions = 0;
@@ -1700,10 +1700,21 @@ static int wm_adsp_load(struct wm_adsp *dsp)
 			 regions, le32_to_cpu(region->len), offset,
 			 region_name);
 
+		if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
+		    firmware->size) {
+			adsp_err(dsp,
+				 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+				 file, regions, region_name,
+				 le32_to_cpu(region->len), firmware->size);
+			ret = -EINVAL;
+			goto out_fw;
+		}
+
 		if (text) {
 			memcpy(text, region->data, le32_to_cpu(region->len));
 			adsp_info(dsp, "%s: %s\n", file, text);
 			kfree(text);
+			text = NULL;
 		}
 
 		if (reg) {
@@ -1748,6 +1759,7 @@ out_fw:
 	regmap_async_complete(regmap);
 	wm_adsp_buf_free(&buf_list);
 	release_firmware(firmware);
+	kfree(text);
 out:
 	kfree(file);
 
@@ -2233,6 +2245,17 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 		}
 
 		if (reg) {
+			if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
+			    firmware->size) {
+				adsp_err(dsp,
+					 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+					 file, blocks, region_name,
+					 le32_to_cpu(blk->len),
+					 firmware->size);
+				ret = -EINVAL;
+				goto out_fw;
+			}
+
 			buf = wm_adsp_buf_alloc(blk->data,
 						le32_to_cpu(blk->len),
 						&buf_list);

From 7dbbf0fa1bf14c17900bb8057986b06db3822239 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 22 Nov 2016 16:17:50 -0800
Subject: [PATCH 015/297] scsi: scsi-mq: Wait for .queue_rq() if necessary

Ensure that if scsi-mq is enabled that scsi_internal_device_block()
waits until ongoing shost->hostt->queuecommand() calls have finished.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: James Bottomley <jejb@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Doug Ledford <dledford@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c35b6de4ca64..9fd9a977c695 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2893,7 +2893,7 @@ scsi_internal_device_block(struct scsi_device *sdev)
 	 * request queue. 
 	 */
 	if (q->mq_ops) {
-		blk_mq_stop_hw_queues(q);
+		blk_mq_quiesce_queue(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);

From 7961d53d22375bb9e8ae8063533b9059102ed39d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 20 Dec 2016 08:43:30 -0800
Subject: [PATCH 016/297] scsi: qedi: fix build, depends on UIO

Fix build of SCSI qedi driver. It uses uio interfaces so it should
depend on UIO.

ERROR: "uio_unregister_device" [drivers/scsi/qedi/qedi.ko] undefined!
ERROR: "uio_event_notify" [drivers/scsi/qedi/qedi.ko] undefined!
ERROR: "__uio_register_device" [drivers/scsi/qedi/qedi.ko] undefined!

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: QLogic-Storage-Upstream@cavium.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedi/Kconfig b/drivers/scsi/qedi/Kconfig
index 23ca8a274586..21331453db7b 100644
--- a/drivers/scsi/qedi/Kconfig
+++ b/drivers/scsi/qedi/Kconfig
@@ -1,6 +1,6 @@
 config QEDI
 	tristate "QLogic QEDI 25/40/100Gb iSCSI Initiator Driver Support"
-	depends on PCI && SCSI
+	depends on PCI && SCSI && UIO
 	depends on QED
 	select SCSI_ISCSI_ATTRS
 	select QED_LL2

From 6c5d5cfbe3c59429e6d6f66477a7609aacf69751 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 20 Dec 2016 19:14:34 +0800
Subject: [PATCH 017/297] netfilter: ipt_CLUSTERIP: check duplicate config when
 initializing

Now when adding an ipt_CLUSTERIP rule, it only checks duplicate config in
clusterip_config_find_get(). But after that, there may be still another
thread to insert a config with the same ip, then it leaves proc_create_data
to do duplicate check.

It's more reasonable to check duplicate config by ipt_CLUSTERIP itself,
instead of checking it by proc fs duplicate file check. Before, when proc
fs allowed duplicate name files in a directory, It could even crash kernel
because of use-after-free.

This patch is to check duplicate config under the protection of clusterip
net lock when initializing a new config and correct the return err.

Note that it also moves proc file node creation after adding new config, as
proc_create_data may sleep, it couldn't be called under the clusterip_net
lock. clusterip_config_find_get returns NULL if c->pde is null to make sure
it can't be used until the proc file node creation is done.

Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 34 ++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 21db00d0362b..a6b8c1a4102b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -144,7 +144,7 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 	rcu_read_lock_bh();
 	c = __clusterip_config_find(net, clusterip);
 	if (c) {
-		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+		if (!c->pde || unlikely(!atomic_inc_not_zero(&c->refcount)))
 			c = NULL;
 		else if (entry)
 			atomic_inc(&c->entries);
@@ -166,14 +166,15 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
 
 static struct clusterip_config *
 clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
-			struct net_device *dev)
+		      struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
 	struct clusterip_config *c;
-	struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
+	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 
 	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	c->dev = dev;
 	c->clusterip = ip;
@@ -185,6 +186,17 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	atomic_set(&c->refcount, 1);
 	atomic_set(&c->entries, 1);
 
+	spin_lock_bh(&cn->lock);
+	if (__clusterip_config_find(net, ip)) {
+		spin_unlock_bh(&cn->lock);
+		kfree(c);
+
+		return ERR_PTR(-EBUSY);
+	}
+
+	list_add_rcu(&c->list, &cn->configs);
+	spin_unlock_bh(&cn->lock);
+
 #ifdef CONFIG_PROC_FS
 	{
 		char buffer[16];
@@ -195,16 +207,16 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
 		if (!c->pde) {
+			spin_lock_bh(&cn->lock);
+			list_del_rcu(&c->list);
+			spin_unlock_bh(&cn->lock);
 			kfree(c);
-			return NULL;
+
+			return ERR_PTR(-ENOMEM);
 		}
 	}
 #endif
 
-	spin_lock_bh(&cn->lock);
-	list_add_rcu(&c->list, &cn->configs);
-	spin_unlock_bh(&cn->lock);
-
 	return c;
 }
 
@@ -410,9 +422,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 
 			config = clusterip_config_init(cipinfo,
 							e->ip.dst.s_addr, dev);
-			if (!config) {
+			if (IS_ERR(config)) {
 				dev_put(dev);
-				return -ENOMEM;
+				return PTR_ERR(config);
 			}
 			dev_mc_add(config->dev, config->clustermac);
 		}

From b6fc513da50c5dbc457a8ad6b58b046a6a68fd9d Mon Sep 17 00:00:00 2001
From: Pavel Rojtberg <rojtberg@gmail.com>
Date: Tue, 27 Dec 2016 11:44:51 -0800
Subject: [PATCH 018/297] Input: xpad - use correct product id for x360w
 controllers

currently the controllers get the same product id as the wireless
receiver. However the controllers actually have their own product id.

The patch makes the driver expose the same product id as the windows
driver.

This improves compatibility when running applications with WINE.

see https://github.com/paroj/xpad/issues/54

Signed-off-by: Pavel Rojtberg <rojtberg@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 6d9499658671..c7d5b2b643d1 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1377,6 +1377,12 @@ static int xpad_init_input(struct usb_xpad *xpad)
 	input_dev->name = xpad->name;
 	input_dev->phys = xpad->phys;
 	usb_to_input_id(xpad->udev, &input_dev->id);
+
+	if (xpad->xtype == XTYPE_XBOX360W) {
+		/* x360w controllers and the receiver have different ids */
+		input_dev->id.product = 0x02a1;
+	}
+
 	input_dev->dev.parent = &xpad->intf->dev;
 
 	input_set_drvdata(input_dev, xpad);

From d7ddad0acc4add42567f7879b116a0b9eea31860 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 27 Dec 2016 11:32:55 -0800
Subject: [PATCH 019/297] Input: synaptics-rmi4 - fix F03 build error when
 serio is module

Since F03 is a boolean, "depends" on symbols that can be modules
do not work quite right. We can enable F03 if SERIO is built-in or
if both RMI core and SERIO core are modules. If SERIO core is module,
but RMI is built-in, we'll get:

drivers/built-in.o: In function `rmi_f03_attention':
rmi_f03.c:(.text+0xf8ef8): undefined reference to `serio_interrupt'
rmi_f03.c:(.text+0xf8fbd): undefined reference to `serio_interrupt'
drivers/built-in.o: In function `rmi_f03_remove':
rmi_f03.c:(.text+0xf9082): undefined reference to `serio_unregister_port'
drivers/built-in.o: In function `rmi_f03_probe':
rmi_f03.c:(.text+0xf9260): undefined reference to `__serio_register_port'

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index 30cc627a4f45..8993983e3fe4 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -41,7 +41,8 @@ config RMI4_SMB
 
 config RMI4_F03
         bool "RMI4 Function 03 (PS2 Guest)"
-        depends on RMI4_CORE && SERIO
+	depends on RMI4_CORE
+	depends on SERIO=y || RMI4_CORE=SERIO
         help
           Say Y here if you want to add support for RMI4 function 03.
 

From 9396c9cb0d9534ca67bda8a34b2413a2ca1c67e9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 22 Dec 2016 15:03:50 +0900
Subject: [PATCH 020/297] perf sched timehist: Show total scheduling time

Show length of analyzed sample time and rate of idle task running.
This also takes care of time range given by --time option.

  $ perf sched timehist -sI | tail
  Samples do not have callchains.
  Idle stats:
      CPU  0 idle for    930.316  msec  ( 92.93%)
      CPU  1 idle for    963.614  msec  ( 96.25%)
      CPU  2 idle for    885.482  msec  ( 88.45%)
      CPU  3 idle for    938.635  msec  ( 93.76%)

      Total number of unique tasks: 118
  Total number of context switches: 2337
             Total run time (msec): 3718.048
      Total scheduling time (msec): 1001.131  (x 4)

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20161222060350.17655-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d53e706a6f17..5b134b0d1ff3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -209,6 +209,7 @@ struct perf_sched {
 	u64		skipped_samples;
 	const char	*time_str;
 	struct perf_time_interval ptime;
+	struct perf_time_interval hist_time;
 };
 
 /* per thread run time data */
@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 		timehist_print_sample(sched, sample, &al, thread, t);
 
 out:
+	if (sched->hist_time.start == 0 && t >= ptime->start)
+		sched->hist_time.start = t;
+	if (ptime->end == 0 || t <= ptime->end)
+		sched->hist_time.end = t;
+
 	if (tr) {
 		/* time of this sched_switch event becomes last time task seen */
 		tr->last_time = sample->time;
@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 	struct thread *t;
 	struct thread_runtime *r;
 	int i;
+	u64 hist_time = sched->hist_time.end - sched->hist_time.start;
 
 	memset(&totals, 0, sizeof(totals));
 
@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 			totals.sched_count += r->run_stats.n;
 			printf("    CPU %2d idle for ", i);
 			print_sched_time(r->total_run_time, 6);
-			printf(" msec\n");
+			printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
 		} else
 			printf("    CPU %2d idle entire time window\n", i);
 	}
@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched,
 
 	printf("\n"
 	       "    Total number of unique tasks: %" PRIu64 "\n"
-	       "Total number of context switches: %" PRIu64 "\n"
-	       "           Total run time (msec): ",
+	       "Total number of context switches: %" PRIu64 "\n",
 	       totals.task_count, totals.sched_count);
 
+	printf("           Total run time (msec): ");
 	print_sched_time(totals.total_run_time, 2);
 	printf("\n");
+
+	printf("    Total scheduling time (msec): ");
+	print_sched_time(hist_time, 2);
+	printf(" (x %d)\n", sched->max_cpu);
 }
 
 typedef int (*sched_handler)(struct perf_tool *tool,

From ee12996c9d392ec61241ab6c74d257bc2122e0bc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Dec 2016 21:49:17 -0300
Subject: [PATCH 021/297] samples/bpf sock_example: Avoid getting ethhdr from
 two includes

To avoid the following build failure on Alpine Linux 3.4, that has
clang-3.8 with the bpf target:

    HOSTCC  samples/bpf/sock_example.o
  In file included from /usr/include/net/ethernet.h:10:0,
                   from /git/linux/samples/bpf/sock_example.h:7,
                   from /git/linux/samples/bpf/sock_example.c:30:
  /usr/include/netinet/if_ether.h:96:8: error: redefinition of 'struct
  ethhdr'
   struct ethhdr {
          ^
  In file included from /git/linux/samples/bpf/sock_example.c:26:0:
  ./usr/include/linux/if_ether.h:144:8: note: originally defined here
   struct ethhdr {
          ^
  scripts/Makefile.host:124: recipe for target
  'samples/bpf/sock_example.o' failed
  make[2]: *** [samples/bpf/sock_example.o] Error 1
  /git/linux/Makefile:1658: recipe for target 'samples/bpf/' failed

So include net/if_ether.h for the needs of sock_example.h, using the
same include that sock_example.c uses.

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Stringer <joe@ovn.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-m9avekl1b651qe1r1zd5tzz9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 samples/bpf/sock_example.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h
index 09f7fe7e5fd7..d8014065d479 100644
--- a/samples/bpf/sock_example.h
+++ b/samples/bpf/sock_example.h
@@ -4,7 +4,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
-#include <net/ethernet.h>
+#include <linux/if_ether.h>
 #include <net/if.h>
 #include <linux/if_packet.h>
 #include <arpa/inet.h>

From abfb7b686a3e5be27bf81db62f9c5c895b76f5d1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 24 Dec 2016 13:59:23 +0000
Subject: [PATCH 022/297] efi/libstub/arm*: Pass latest memory map to the
 kernel

As reported by James Morse, the current libstub code involving the
annotated memory map only works somewhat correctly by accident, due
to the fact that a pool allocation happens to be reused immediately,
retaining its former contents on most implementations of the
UEFI boot services.

Instead of juggling memory maps, which makes the code more complex than
it needs to be, simply put placeholder values into the FDT for the memory
map parameters, and only write the actual values after ExitBootServices()
has been called.

Reported-by: James Morse <james.morse@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org>
Cc: Jeffrey Hugo <jhugo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org
Fixes: ed9cc156c42f ("efi/libstub: Use efi_exit_boot_services() in FDT")
Link: http://lkml.kernel.org/r/1482587963-20183-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/efistub.h |  8 ---
 drivers/firmware/efi/libstub/fdt.c     | 87 +++++++++++++++++---------
 2 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index b98824e3800a..0e2a96b12cb3 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -39,14 +39,6 @@ efi_status_t efi_file_close(void *handle);
 
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
 
-efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			unsigned long orig_fdt_size,
-			void *fdt, int new_fdt_size, char *cmdline_ptr,
-			u64 initrd_addr, u64 initrd_size,
-			efi_memory_desc_t *memory_map,
-			unsigned long map_size, unsigned long desc_size,
-			u32 desc_ver);
-
 efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 					    void *handle,
 					    unsigned long *new_fdt_addr,
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index a6a93116a8f0..921dfa047202 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -16,13 +16,10 @@
 
 #include "efistub.h"
 
-efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			unsigned long orig_fdt_size,
-			void *fdt, int new_fdt_size, char *cmdline_ptr,
-			u64 initrd_addr, u64 initrd_size,
-			efi_memory_desc_t *memory_map,
-			unsigned long map_size, unsigned long desc_size,
-			u32 desc_ver)
+static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			       unsigned long orig_fdt_size,
+			       void *fdt, int new_fdt_size, char *cmdline_ptr,
+			       u64 initrd_addr, u64 initrd_size)
 {
 	int node, num_rsv;
 	int status;
@@ -101,25 +98,23 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
+	fdt_val64 = U64_MAX; /* placeholder */
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
 			     &fdt_val64,  sizeof(fdt_val64));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(map_size);
+	fdt_val32 = U32_MAX; /* placeholder */
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-size",
 			     &fdt_val32,  sizeof(fdt_val32));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(desc_size);
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size",
 			     &fdt_val32, sizeof(fdt_val32));
 	if (status)
 		goto fdt_set_fail;
 
-	fdt_val32 = cpu_to_fdt32(desc_ver);
 	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver",
 			     &fdt_val32, sizeof(fdt_val32));
 	if (status)
@@ -148,6 +143,43 @@ fdt_set_fail:
 	return EFI_LOAD_ERROR;
 }
 
+static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map)
+{
+	int node = fdt_path_offset(fdt, "/chosen");
+	u64 fdt_val64;
+	u32 fdt_val32;
+	int err;
+
+	if (node < 0)
+		return EFI_LOAD_ERROR;
+
+	fdt_val64 = cpu_to_fdt64((unsigned long)*map->map);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start",
+				  &fdt_val64, sizeof(fdt_val64));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->map_size);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-size",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->desc_size);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-size",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	fdt_val32 = cpu_to_fdt32(*map->desc_ver);
+	err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-ver",
+				  &fdt_val32, sizeof(fdt_val32));
+	if (err)
+		return EFI_LOAD_ERROR;
+
+	return EFI_SUCCESS;
+}
+
 #ifndef EFI_FDT_ALIGN
 #define EFI_FDT_ALIGN EFI_PAGE_SIZE
 #endif
@@ -243,20 +275,10 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 			goto fail;
 		}
 
-		/*
-		 * Now that we have done our final memory allocation (and free)
-		 * we can get the memory map key  needed for
-		 * exit_boot_services().
-		 */
-		status = efi_get_memory_map(sys_table, &map);
-		if (status != EFI_SUCCESS)
-			goto fail_free_new_fdt;
-
 		status = update_fdt(sys_table,
 				    (void *)fdt_addr, fdt_size,
 				    (void *)*new_fdt_addr, new_fdt_size,
-				    cmdline_ptr, initrd_addr, initrd_size,
-				    memory_map, map_size, desc_size, desc_ver);
+				    cmdline_ptr, initrd_addr, initrd_size);
 
 		/* Succeeding the first time is the expected case. */
 		if (status == EFI_SUCCESS)
@@ -266,20 +288,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 			/*
 			 * We need to allocate more space for the new
 			 * device tree, so free existing buffer that is
-			 * too small.  Also free memory map, as we will need
-			 * to get new one that reflects the free/alloc we do
-			 * on the device tree buffer.
+			 * too small.
 			 */
 			efi_free(sys_table, new_fdt_size, *new_fdt_addr);
-			sys_table->boottime->free_pool(memory_map);
 			new_fdt_size += EFI_PAGE_SIZE;
 		} else {
 			pr_efi_err(sys_table, "Unable to construct new device tree.\n");
-			goto fail_free_mmap;
+			goto fail_free_new_fdt;
 		}
 	}
 
-	sys_table->boottime->free_pool(memory_map);
 	priv.runtime_map = runtime_map;
 	priv.runtime_entry_count = &runtime_entry_count;
 	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -288,6 +306,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
 
+		status = update_fdt_memmap((void *)*new_fdt_addr, &map);
+		if (status != EFI_SUCCESS) {
+			/*
+			 * The kernel won't get far without the memory map, but
+			 * may still be able to print something meaningful so
+			 * return success here.
+			 */
+			return EFI_SUCCESS;
+		}
+
 		/* Install the new virtual address map */
 		svam = sys_table->runtime->set_virtual_address_map;
 		status = svam(runtime_entry_count * desc_size, desc_size,
@@ -319,9 +347,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 
 	pr_efi_err(sys_table, "Exit boot services failed.\n");
 
-fail_free_mmap:
-	sys_table->boottime->free_pool(memory_map);
-
 fail_free_new_fdt:
 	efi_free(sys_table, new_fdt_size, *new_fdt_addr);
 

From b6f4c66704b875aba9b8c912532323e3cc89824c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 28 Dec 2016 10:47:13 -0300
Subject: [PATCH 023/297] samples/bpf trace_output_user: Remove duplicate
 sys/ioctl.h include

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Stringer <joe@ovn.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-3awp0nv8tpnblatojmwjww7z@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 samples/bpf/trace_output_user.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index f4fa6af22def..ccca1e348017 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <fcntl.h>
 #include <poll.h>
-#include <sys/ioctl.h>
 #include <linux/perf_event.h>
 #include <linux/bpf.h>
 #include <errno.h>

From 88b333b0ed790f9433ff542b163bf972953b74d3 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 20 Dec 2016 08:54:29 -0700
Subject: [PATCH 024/297] drm/msm: Ensure that the hardware write pointer is
 valid

Currently the value written to CP_RB_WPTR is calculated on the fly as
(rb->next - rb->start). But as the code is designed rb->next is wrapped
before writing the commands so if a series of commands happened to
fit perfectly in the ringbuffer, rb->next would end up being equal to
rb->size / 4 and thus result in an out of bounds address to CP_RB_WPTR.

The easiest way to fix this is to mask WPTR when writing it to the
hardware; it makes the hardware happy and the rest of the ringbuffer
math appears to work and there isn't any point in upsetting anything.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[squash in is_power_of_2() check]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 9 ++++++++-
 drivers/gpu/drm/msm/msm_ringbuffer.c    | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index a18126150e11..14ff87686a36 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -213,7 +213,14 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 void adreno_flush(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	uint32_t wptr = get_wptr(gpu->rb);
+	uint32_t wptr;
+
+	/*
+	 * Mask wptr value that we calculate to fit in the HW range. This is
+	 * to account for the possibility that the last command fit exactly into
+	 * the ringbuffer and rb->next hasn't wrapped to zero yet
+	 */
+	wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1);
 
 	/* ensure writes to ringbuffer have hit system memory: */
 	mb();
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index f326cf6a32e6..67b34e069abf 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -23,7 +23,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
 	struct msm_ringbuffer *ring;
 	int ret;
 
-	size = ALIGN(size, 4);   /* size should be dword aligned */
+	if (WARN_ON(!is_power_of_2(size)))
+		return ERR_PTR(-EINVAL);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring) {

From 6490abc4bc35fa4f3bdb9c7e49096943c50e29ea Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 20 Dec 2016 08:54:30 -0700
Subject: [PATCH 025/297] drm/msm: Put back the vaddr in submit_reloc()

The error cases in submit_reloc() need to put back the virtual
address of the bo before failling. Add a single failure path
for the function.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 166e84e4f0d4..b6411ea6ed17 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -290,7 +290,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 {
 	uint32_t i, last_offset = 0;
 	uint32_t *ptr;
-	int ret;
+	int ret = 0;
 
 	if (offset % 4) {
 		DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset);
@@ -318,12 +318,13 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 
 		ret = copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc));
 		if (ret)
-			return -EFAULT;
+			goto out;
 
 		if (submit_reloc.submit_offset % 4) {
 			DRM_ERROR("non-aligned reloc offset: %u\n",
 					submit_reloc.submit_offset);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		/* offset in dwords: */
@@ -332,12 +333,13 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 		if ((off >= (obj->base.size / 4)) ||
 				(off < last_offset)) {
 			DRM_ERROR("invalid offset %u at reloc %u\n", off, i);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid);
 		if (ret)
-			return ret;
+			goto out;
 
 		if (valid)
 			continue;
@@ -354,9 +356,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 		last_offset = off;
 	}
 
+out:
 	msm_gem_put_vaddr_locked(&obj->base);
 
-	return 0;
+	return ret;
 }
 
 static void submit_cleanup(struct msm_gem_submit *submit)

From a6cb3b864b21b7345f824a4faa12b723c8aaf099 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 20 Dec 2016 08:54:31 -0700
Subject: [PATCH 026/297] drm/msm: Verify that MSM_SUBMIT_BO_FLAGS are set

For every submission buffer object one of MSM_SUBMIT_BO_WRITE
and MSM_SUBMIT_BO_READ must be set (and nothing else). If we
allowed zero then the buffer object would never get queued to
be unreferenced.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b6411ea6ed17..489676568a10 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -106,7 +106,8 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 			pagefault_disable();
 		}
 
-		if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
+		if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) ||
+			!(submit_bo.flags & MSM_SUBMIT_BO_FLAGS)) {
 			DRM_ERROR("invalid flags: %x\n", submit_bo.flags);
 			ret = -EINVAL;
 			goto out_unlock;

From abc8d5832fd142d565fbfca163348f33b08bc1fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 16 Dec 2016 10:08:14 +0100
Subject: [PATCH 027/297] gpio: mxs: remove __init annotation

Building with an old toolchain, I ran into this warning:

WARNING: vmlinux.o(.text+0x63eef0): Section mismatch in reference
  from the function mxs_gpio_probe() to the function
  .init.text:mxs_gpio_init_gc()

Clearly the annotation is wrong, since the function is called from the
non-init probe, so let's remove it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mxs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 1e8fde8cb803..2292742eac8f 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -205,7 +205,7 @@ static int mxs_gpio_set_wake_irq(struct irq_data *d, unsigned int enable)
 	return 0;
 }
 
-static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base)
+static int mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base)
 {
 	struct irq_chip_generic *gc;
 	struct irq_chip_type *ct;

From 5018ada69a04c8ac21d74bd682fceb8e42dc0f96 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 19 Dec 2016 18:29:23 +0100
Subject: [PATCH 028/297] gpio: Move freeing of GPIO hogs before numbing of the
 device

When removing a gpiochip that uses GPIO hogging (e.g. by unloading the
chip's DT overlay), a warning is printed:

    gpio gpiochip8: REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED

This happens because gpiochip_free_hogs() is called after the gdev->chip
pointer is reset to NULL. Hence __gpiod_free() cannot determine the
chip in use, and cannot clear flags nor call the optional chip-specific
.free() callback.

Move the call to gpiochip_free_hogs() up to fix this.

Cc: stable@vger.kernel.org
Fixes: ff2b135922992756 ("gpio: make the gpiochip a real device")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f4c26c7826cd..86bf3b84ada5 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1317,12 +1317,12 @@ void gpiochip_remove(struct gpio_chip *chip)
 
 	/* FIXME: should the legacy sysfs handling be moved to gpio_device? */
 	gpiochip_sysfs_unregister(gdev);
+	gpiochip_free_hogs(chip);
 	/* Numb the device, cancelling all outstanding operations */
 	gdev->chip = NULL;
 	gpiochip_irqchip_remove(chip);
 	acpi_gpiochip_remove(chip);
 	gpiochip_remove_pin_ranges(chip);
-	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
 	/*
 	 * We accept no more calls into the driver from this point, so

From 07825f0acd85dd8b7481d5ef0eb024b05364d892 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Dec 2016 17:44:15 +0800
Subject: [PATCH 029/297] crypto: aesni - Fix failure when built-in with
 modular pcbc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If aesni is built-in but pcbc is built as a module, then aesni
will fail completely because when it tries to register the pcbc
variant of aes the pcbc template is not available.

This patch fixes this by modifying the pcbc presence test so that
if aesni is built-in then pcbc must also be built-in for it to be
used by aesni.

Fixes: 85671860caac ("crypto: aesni - Convert to skcipher")
Reported-by: Stephan Müller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 31c34ee131f3..6ef688a1ef3e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1020,7 +1020,8 @@ struct {
 	const char *basename;
 	struct simd_skcipher_alg *simd;
 } aesni_simd_skciphers2[] = {
-#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
+#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
+    IS_BUILTIN(CONFIG_CRYPTO_PCBC)
 	{
 		.algname	= "pcbc(aes)",
 		.drvname	= "pcbc-aes-aesni",

From 63447646ac657fde00bb658ce21a3431940ae0ad Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Thu, 15 Dec 2016 15:49:56 +0100
Subject: [PATCH 030/297] rpmsg: virtio_rpmsg_bus: fix channel creation

Since commit 4dffed5b3ac796b ("rpmsg: Name rpmsg devices based on
channel id"), it is no more possible for a firmware to register twice
a service (on different endpoints). rpmsg_register_device function
is failing when calling device_add for the second time as second
device has the same name as first one already register.
It is because name is based only on service name and so is not more
unique. Previously name was unique thanks to the use of rpmsg_dev_index.

This patch adds destination and source endpoint numbers device name to
create an unique identifier.

Fixes: 4dffed5b3ac7 ("rpmsg: Name rpmsg devices based on channel id")
Acked-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
[bjorn: flipped name and address in device name]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/rpmsg/rpmsg_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index a79cb5a9e5f2..1cfb775e8e82 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -453,8 +453,8 @@ int rpmsg_register_device(struct rpmsg_device *rpdev)
 	struct device *dev = &rpdev->dev;
 	int ret;
 
-	dev_set_name(&rpdev->dev, "%s:%s",
-		     dev_name(dev->parent), rpdev->id.name);
+	dev_set_name(&rpdev->dev, "%s.%s.%d.%d", dev_name(dev->parent),
+		     rpdev->id.name, rpdev->src, rpdev->dst);
 
 	rpdev->dev.bus = &rpmsg_bus;
 	rpdev->dev.release = rpmsg_release_device;

From c81c0e0710f031cb09eb7cbf0e75e6754d1d8346 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Wed, 14 Dec 2016 16:11:00 +0100
Subject: [PATCH 031/297] remoteproc: fix vdev reference management

Commit 2b45cef5868a ("remoteproc: Further extend the vdev life cycle")
extends kref support for vdev management.
It introduces a regression when following sequence is executed:
rproc_boot --> rproc_shutdown --> rproc_boot
Second rproc_boot call crashes on register_virtio_device as device
is already existing.
Issue is previous vdev is never released when rproc is stop because
associated refcount is too high.

kref_get introduces is not needed as kref_init already initializes
krefcount to 1 because it considers associated variable as used.
This introduces a misalignment between kref_get and kref_put calls.

Fixes: 2b45cef5868a ("remoteproc: Further extend the vdev life cycle")
Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9a507e77eced..feb24c43d4c7 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -396,9 +396,6 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 			goto unwind_vring_allocations;
 	}
 
-	/* track the rvdevs list reference */
-	kref_get(&rvdev->refcount);
-
 	list_add_tail(&rvdev->node, &rproc->rvdevs);
 
 	rproc_add_subdev(rproc, &rvdev->subdev,

From a0c10687ec9506b5e14fe3dd47832a77f2f2500c Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 30 Dec 2016 03:21:38 -0800
Subject: [PATCH 032/297] Revert "remoteproc: Merge table_ptr and cached_table
 pointers"

Following any fw_rsc_vdev entries in the resource table are two variable
length arrays, the first one reference vring resources and the second
one is the virtio config space.  The virtio config space is used by
virtio to communicate status and configuration changes and must as such
be shared with the remote.

The reverted commit incorrectly made any changes to the virtio config
space only affect the local copy, in an attempt to allowing memory
protection of the shared resource table.

This reverts commit cda8529346935fc86f476999ac4fbfe4e17abf11.
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 26 ++++++++++++++++----------
 include/linux/remoteproc.h           |  4 +++-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index feb24c43d4c7..90b05c72186c 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -886,13 +886,15 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	/*
 	 * Create a copy of the resource table. When a virtio device starts
 	 * and calls vring_new_virtqueue() the address of the allocated vring
-	 * will be stored in the table_ptr. Before the device is started,
-	 * table_ptr will be copied into device memory.
+	 * will be stored in the cached_table. Before the device is started,
+	 * cached_table will be copied into device memory.
 	 */
-	rproc->table_ptr = kmemdup(table, tablesz, GFP_KERNEL);
-	if (!rproc->table_ptr)
+	rproc->cached_table = kmemdup(table, tablesz, GFP_KERNEL);
+	if (!rproc->cached_table)
 		goto clean_up;
 
+	rproc->table_ptr = rproc->cached_table;
+
 	/* reset max_notifyid */
 	rproc->max_notifyid = -1;
 
@@ -911,16 +913,18 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	}
 
 	/*
-	 * The starting device has been given the rproc->table_ptr as the
+	 * The starting device has been given the rproc->cached_table as the
 	 * resource table. The address of the vring along with the other
-	 * allocated resources (carveouts etc) is stored in table_ptr.
+	 * allocated resources (carveouts etc) is stored in cached_table.
 	 * In order to pass this information to the remote device we must copy
 	 * this information to device memory. We also update the table_ptr so
 	 * that any subsequent changes will be applied to the loaded version.
 	 */
 	loaded_table = rproc_find_loaded_rsc_table(rproc, fw);
-	if (loaded_table)
-		memcpy(loaded_table, rproc->table_ptr, tablesz);
+	if (loaded_table) {
+		memcpy(loaded_table, rproc->cached_table, tablesz);
+		rproc->table_ptr = loaded_table;
+	}
 
 	/* power up the remote processor */
 	ret = rproc->ops->start(rproc);
@@ -948,7 +952,8 @@ stop_rproc:
 clean_up_resources:
 	rproc_resource_cleanup(rproc);
 clean_up:
-	kfree(rproc->table_ptr);
+	kfree(rproc->cached_table);
+	rproc->cached_table = NULL;
 	rproc->table_ptr = NULL;
 
 	rproc_disable_iommu(rproc);
@@ -1182,7 +1187,8 @@ void rproc_shutdown(struct rproc *rproc)
 	rproc_disable_iommu(rproc);
 
 	/* Free the copy of the resource table */
-	kfree(rproc->table_ptr);
+	kfree(rproc->cached_table);
+	rproc->cached_table = NULL;
 	rproc->table_ptr = NULL;
 
 	/* if in crash state, unlock crash handler */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e2f3a3281d8f..8265d351c9f0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -408,7 +408,8 @@ enum rproc_crash_type {
  * @crash_comp: completion used to sync crash handler and the rproc reload
  * @recovery_disabled: flag that state if recovery was disabled
  * @max_notifyid: largest allocated notify id.
- * @table_ptr: our copy of the resource table
+ * @table_ptr: pointer to the resource table in effect
+ * @cached_table: copy of the resource table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  */
 struct rproc {
@@ -440,6 +441,7 @@ struct rproc {
 	bool recovery_disabled;
 	int max_notifyid;
 	struct resource_table *table_ptr;
+	struct resource_table *cached_table;
 	bool has_iommu;
 	bool auto_boot;
 };

From 570b90fa230b8021f51a67fab2245fe8df6fe37d Mon Sep 17 00:00:00 2001
From: Andrew Lutomirski <luto@kernel.org>
Date: Mon, 12 Dec 2016 12:55:55 -0800
Subject: [PATCH 033/297] orinoco: Use shash instead of ahash for MIC
 calculations

Eric Biggers pointed out that the orinoco driver pointed scatterlists
at the stack.

Fix it by switching from ahash to shash.  The result should be
simpler, faster, and more correct.

kvalo: cherry picked from commit 1fef293b8a9850cfa124a53c1d8878d355010403 as I
accidentally applied this patch to wireless-drivers-next when I was supposed to
apply this wireless-drivers

Cc: stable@vger.kernel.org # 4.9 only
Reported-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/intersil/orinoco/mic.c   | 44 +++++++++++--------
 drivers/net/wireless/intersil/orinoco/mic.h   |  3 +-
 .../net/wireless/intersil/orinoco/orinoco.h   |  4 +-
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/intersil/orinoco/mic.c b/drivers/net/wireless/intersil/orinoco/mic.c
index bc7397d709d3..08bc7822f820 100644
--- a/drivers/net/wireless/intersil/orinoco/mic.c
+++ b/drivers/net/wireless/intersil/orinoco/mic.c
@@ -16,7 +16,7 @@
 /********************************************************************/
 int orinoco_mic_init(struct orinoco_private *priv)
 {
-	priv->tx_tfm_mic = crypto_alloc_ahash("michael_mic", 0,
+	priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0,
 					      CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->tx_tfm_mic)) {
 		printk(KERN_DEBUG "orinoco_mic_init: could not allocate "
@@ -25,7 +25,7 @@ int orinoco_mic_init(struct orinoco_private *priv)
 		return -ENOMEM;
 	}
 
-	priv->rx_tfm_mic = crypto_alloc_ahash("michael_mic", 0,
+	priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0,
 					      CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->rx_tfm_mic)) {
 		printk(KERN_DEBUG "orinoco_mic_init: could not allocate "
@@ -40,17 +40,16 @@ int orinoco_mic_init(struct orinoco_private *priv)
 void orinoco_mic_free(struct orinoco_private *priv)
 {
 	if (priv->tx_tfm_mic)
-		crypto_free_ahash(priv->tx_tfm_mic);
+		crypto_free_shash(priv->tx_tfm_mic);
 	if (priv->rx_tfm_mic)
-		crypto_free_ahash(priv->rx_tfm_mic);
+		crypto_free_shash(priv->rx_tfm_mic);
 }
 
-int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key,
+int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key,
 		u8 *da, u8 *sa, u8 priority,
 		u8 *data, size_t data_len, u8 *mic)
 {
-	AHASH_REQUEST_ON_STACK(req, tfm_michael);
-	struct scatterlist sg[2];
+	SHASH_DESC_ON_STACK(desc, tfm_michael);
 	u8 hdr[ETH_HLEN + 2]; /* size of header + padding */
 	int err;
 
@@ -67,18 +66,27 @@ int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key,
 	hdr[ETH_ALEN * 2 + 2] = 0;
 	hdr[ETH_ALEN * 2 + 3] = 0;
 
-	/* Use scatter gather to MIC header and data in one go */
-	sg_init_table(sg, 2);
-	sg_set_buf(&sg[0], hdr, sizeof(hdr));
-	sg_set_buf(&sg[1], data, data_len);
+	desc->tfm = tfm_michael;
+	desc->flags = 0;
 
-	if (crypto_ahash_setkey(tfm_michael, key, MIC_KEYLEN))
-		return -1;
+	err = crypto_shash_setkey(tfm_michael, key, MIC_KEYLEN);
+	if (err)
+		return err;
+
+	err = crypto_shash_init(desc);
+	if (err)
+		return err;
+
+	err = crypto_shash_update(desc, hdr, sizeof(hdr));
+	if (err)
+		return err;
+
+	err = crypto_shash_update(desc, data, data_len);
+	if (err)
+		return err;
+
+	err = crypto_shash_final(desc, mic);
+	shash_desc_zero(desc);
 
-	ahash_request_set_tfm(req, tfm_michael);
-	ahash_request_set_callback(req, 0, NULL, NULL);
-	ahash_request_set_crypt(req, sg, mic, data_len + sizeof(hdr));
-	err = crypto_ahash_digest(req);
-	ahash_request_zero(req);
 	return err;
 }
diff --git a/drivers/net/wireless/intersil/orinoco/mic.h b/drivers/net/wireless/intersil/orinoco/mic.h
index ce731d05cc98..e8724e889219 100644
--- a/drivers/net/wireless/intersil/orinoco/mic.h
+++ b/drivers/net/wireless/intersil/orinoco/mic.h
@@ -6,6 +6,7 @@
 #define _ORINOCO_MIC_H_
 
 #include <linux/types.h>
+#include <crypto/hash.h>
 
 #define MICHAEL_MIC_LEN 8
 
@@ -15,7 +16,7 @@ struct crypto_ahash;
 
 int orinoco_mic_init(struct orinoco_private *priv);
 void orinoco_mic_free(struct orinoco_private *priv);
-int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key,
+int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key,
 		u8 *da, u8 *sa, u8 priority,
 		u8 *data, size_t data_len, u8 *mic);
 
diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h
index 2f0c84b1c440..5fa1c3e3713f 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco.h
+++ b/drivers/net/wireless/intersil/orinoco/orinoco.h
@@ -152,8 +152,8 @@ struct orinoco_private {
 	u8 *wpa_ie;
 	int wpa_ie_len;
 
-	struct crypto_ahash *rx_tfm_mic;
-	struct crypto_ahash *tx_tfm_mic;
+	struct crypto_shash *rx_tfm_mic;
+	struct crypto_shash *tx_tfm_mic;
 
 	unsigned int wpa_enabled:1;
 	unsigned int tkip_cm_active:1;

From 60f59ce0278557f7896d5158ae6d12a4855a72cc Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Wed, 21 Dec 2016 11:18:55 -0600
Subject: [PATCH 034/297] rtlwifi: rtl_usb: Fix missing entry in USB driver's
 private data

These drivers need to be able to reference "struct ieee80211_hw" from
the driver's private data, and vice versa. The USB driver failed to
store the address of ieee80211_hw in the private data. Although this
bug has been present for a long time, it was not exposed until
commit ba9f93f82aba ("rtlwifi: Fix enter/exit power_save").

Fixes: ba9f93f82aba ("rtlwifi: Fix enter/exit power_save")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 0a508649903d..49015b05f3d1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -1063,6 +1063,7 @@ int rtl_usb_probe(struct usb_interface *intf,
 		return -ENOMEM;
 	}
 	rtlpriv = hw->priv;
+	rtlpriv->hw = hw;
 	rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32),
 				    GFP_KERNEL);
 	if (!rtlpriv->usb_data)

From 14221cc45caad2fcab3a8543234bb7eda9b540d5 Mon Sep 17 00:00:00 2001
From: Artur Molchanov <arturmolchanov@gmail.com>
Date: Fri, 30 Dec 2016 19:46:36 +0300
Subject: [PATCH 035/297] bridge: netfilter: Fix dropping packets that moving
 through bridge interface

Problem:
br_nf_pre_routing_finish() calls itself instead of
br_nf_pre_routing_finish_bridge(). Due to this bug reverse path filter drops
packets that go through bridge interface.

User impact:
Local docker containers with bridge network can not communicate with each
other.

Fixes: c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh")
Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index b12501a77f18..135cc8ab813c 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -399,7 +399,7 @@ bridged_dnat:
 				br_nf_hook_thresh(NF_BR_PRE_ROUTING,
 						  net, sk, skb, skb->dev,
 						  NULL,
-						  br_nf_pre_routing_finish);
+						  br_nf_pre_routing_finish_bridge);
 				return 0;
 			}
 			ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);

From d2e3a1358c37cd82eef92b5e908b4f0472194481 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 29 Dec 2016 14:11:21 +0100
Subject: [PATCH 036/297] ASoC: Fix binding and probing of auxiliary components

Currently binding of auxiliary devices doesn't work as in
soc_bind_aux_dev() function a bound component is not being added
to any list and in soc_probe_aux_devices() we are trying to walk
the component_dev_list list to probe auxiliary components but
at that time this list doesn't contain any auxiliary components
since they are being added to the card only in soc_probe_component().

This patch adds a list to the card where are stored bound but not
probed auxiliary devices, so that all aux devices can be probed.

Fixes: 1a653aa44725 "ASoC: core: replace aux_comp_list to component_dev_list"
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  3 +++
 sound/soc/soc-core.c | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2b502f6cc6d0..b86168a21d56 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -813,6 +813,7 @@ struct snd_soc_component {
 	unsigned int suspended:1; /* is in suspend PM state */
 
 	struct list_head list;
+	struct list_head card_aux_list; /* for auxiliary bound components */
 	struct list_head card_list;
 
 	struct snd_soc_dai_driver *dai_drv;
@@ -1152,6 +1153,7 @@ struct snd_soc_card {
 	 */
 	struct snd_soc_aux_dev *aux_dev;
 	int num_aux_devs;
+	struct list_head aux_comp_list;
 
 	const struct snd_kcontrol_new *controls;
 	int num_controls;
@@ -1547,6 +1549,7 @@ static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
 	INIT_LIST_HEAD(&card->widgets);
 	INIT_LIST_HEAD(&card->paths);
 	INIT_LIST_HEAD(&card->dapm_list);
+	INIT_LIST_HEAD(&card->aux_comp_list);
 	INIT_LIST_HEAD(&card->component_dev_list);
 }
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f1901bb1466e..baa1afa41e3d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1748,6 +1748,7 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
 
 	component->init = aux_dev->init;
 	component->auxiliary = 1;
+	list_add(&component->card_aux_list, &card->aux_comp_list);
 
 	return 0;
 
@@ -1758,16 +1759,14 @@ err_defer:
 
 static int soc_probe_aux_devices(struct snd_soc_card *card)
 {
-	struct snd_soc_component *comp;
+	struct snd_soc_component *comp, *tmp;
 	int order;
 	int ret;
 
 	for (order = SND_SOC_COMP_ORDER_FIRST; order <= SND_SOC_COMP_ORDER_LAST;
 		order++) {
-		list_for_each_entry(comp, &card->component_dev_list, card_list) {
-			if (!comp->auxiliary)
-				continue;
-
+		list_for_each_entry_safe(comp, tmp, &card->aux_comp_list,
+					 card_aux_list) {
 			if (comp->driver->probe_order == order) {
 				ret = soc_probe_component(card,	comp);
 				if (ret < 0) {
@@ -1776,6 +1775,7 @@ static int soc_probe_aux_devices(struct snd_soc_card *card)
 						comp->name, ret);
 					return ret;
 				}
+				list_del(&comp->card_aux_list);
 			}
 		}
 	}

From 63c3194b82530bd71fd49db84eb7ab656b8d404a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 23 Dec 2016 11:21:10 +0200
Subject: [PATCH 037/297] ASoC: tlv320aic3x: Mark the RESET register as
 volatile

The RESET register only have one self clearing bit and it should not be
cached. If it is cached, when we sync the registers back to the chip we
will initiate a software reset as well, which is not desirable.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tlv320aic3x.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 8877b74b0510..bb94d50052d7 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -126,6 +126,16 @@ static const struct reg_default aic3x_reg[] = {
 	{ 108, 0x00 }, { 109, 0x00 },
 };
 
+static bool aic3x_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AIC3X_RESET:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static const struct regmap_config aic3x_regmap = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -133,6 +143,9 @@ static const struct regmap_config aic3x_regmap = {
 	.max_register = DAC_ICC_ADJ,
 	.reg_defaults = aic3x_reg,
 	.num_reg_defaults = ARRAY_SIZE(aic3x_reg),
+
+	.volatile_reg = aic3x_volatile_reg,
+
 	.cache_type = REGCACHE_RBTREE,
 };
 

From 91ce54978ccece323aa6df930249ff84a7d233c7 Mon Sep 17 00:00:00 2001
From: G Kranthi <gudishax.kranthikumar@intel.com>
Date: Tue, 20 Dec 2016 12:46:45 +0530
Subject: [PATCH 038/297] ASoC: Intel: Skylake: Fix to fail safely if module
 not available in path

If a module is not available in a pipeline, fail safely rather than
causing oops.

Signed-off-by: G Kranthi <gudishax.kranthikumar@intel.com>
Signed-off-by: Subhransu S. Prusty <subhransu.s.prusty@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-pcm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 84b5101e6ca6..6c6b63a6b338 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -180,6 +180,9 @@ static int skl_pcm_open(struct snd_pcm_substream *substream,
 	snd_pcm_set_sync(substream);
 
 	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
+	if (!mconfig)
+		return -EINVAL;
+
 	skl_tplg_d0i3_get(skl, mconfig->d0i3_caps);
 
 	return 0;

From a33b56a6a824fa5cd89c74f85cbeb9af1dcef87e Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Tue, 20 Dec 2016 16:47:06 +0800
Subject: [PATCH 039/297] ASoC: nau8825: correct the function name of register

Change to correct name of the register function.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8825.c | 6 +++---
 sound/soc/codecs/nau8825.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index efe3a44658d5..abf77dd422f4 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -561,9 +561,9 @@ static void nau8825_xtalk_prepare(struct nau8825 *nau8825)
 	nau8825_xtalk_backup(nau8825);
 	/* Config IIS as master to output signal by codec */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
-		NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK |
+		NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK |
 		NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_MASTER |
-		(0x2 << NAU8825_I2S_DRV_SFT) | 0x1);
+		(0x2 << NAU8825_I2S_LRC_DIV_SFT) | 0x1);
 	/* Ramp up headphone volume to 0dB to get better performance and
 	 * avoid pop noise in headphone.
 	 */
@@ -657,7 +657,7 @@ static void nau8825_xtalk_clean(struct nau8825 *nau8825)
 		NAU8825_IRQ_RMS_EN, NAU8825_IRQ_RMS_EN);
 	/* Recover default value for IIS */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
-		NAU8825_I2S_MS_MASK | NAU8825_I2S_DRV_MASK |
+		NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK |
 		NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_SLAVE);
 	/* Restore value of specific register for cross talk */
 	nau8825_xtalk_restore(nau8825);
diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h
index 5d1704e73241..b6b21b312854 100644
--- a/sound/soc/codecs/nau8825.h
+++ b/sound/soc/codecs/nau8825.h
@@ -247,8 +247,8 @@
 
 /* I2S_PCM_CTRL2 (0x1d) */
 #define NAU8825_I2S_TRISTATE	(1 << 15) /* 0 - normal mode, 1 - Hi-Z output */
-#define NAU8825_I2S_DRV_SFT	12
-#define NAU8825_I2S_DRV_MASK	(0x3 << NAU8825_I2S_DRV_SFT)
+#define NAU8825_I2S_LRC_DIV_SFT	12
+#define NAU8825_I2S_LRC_DIV_MASK	(0x3 << NAU8825_I2S_LRC_DIV_SFT)
 #define NAU8825_I2S_MS_SFT	3
 #define NAU8825_I2S_MS_MASK	(1 << NAU8825_I2S_MS_SFT)
 #define NAU8825_I2S_MS_MASTER	(1 << NAU8825_I2S_MS_SFT)

From a1792cda51300e15b03549cccf0b09f3be82e697 Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Tue, 20 Dec 2016 12:03:09 +0800
Subject: [PATCH 040/297] ASoC: nau8825: fix invalid configuration in
 Pre-Scalar of FLL

The clk_ref_div is not configured in the correct position of the
register. The patch fixes that clk_ref_div, Pre-Scalar, is assigned
the wrong value.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8825.c | 3 ++-
 sound/soc/codecs/nau8825.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index abf77dd422f4..4576f987a4a5 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -2006,7 +2006,8 @@ static void nau8825_fll_apply(struct nau8825 *nau8825,
 			NAU8825_FLL_INTEGER_MASK, fll_param->fll_int);
 	/* FLL pre-scaler */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL4,
-			NAU8825_FLL_REF_DIV_MASK, fll_param->clk_ref_div);
+			NAU8825_FLL_REF_DIV_MASK,
+			fll_param->clk_ref_div << NAU8825_FLL_REF_DIV_SFT);
 	/* select divided VCO input */
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_FLL5,
 		NAU8825_FLL_CLK_SW_MASK, NAU8825_FLL_CLK_SW_REF);
diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h
index b6b21b312854..514fd13c2f46 100644
--- a/sound/soc/codecs/nau8825.h
+++ b/sound/soc/codecs/nau8825.h
@@ -137,7 +137,8 @@
 #define NAU8825_FLL_CLK_SRC_FS			(0x3 << NAU8825_FLL_CLK_SRC_SFT)
 
 /* FLL4 (0x07) */
-#define NAU8825_FLL_REF_DIV_MASK		(0x3 << 10)
+#define NAU8825_FLL_REF_DIV_SFT	10
+#define NAU8825_FLL_REF_DIV_MASK	(0x3 << NAU8825_FLL_REF_DIV_SFT)
 
 /* FLL5 (0x08) */
 #define NAU8825_FLL_PDB_DAC_EN		(0x1 << 15)

From 1594c18fd297a8edcc72bc4b161f3f52603ebb92 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 13 Dec 2016 11:15:21 -0700
Subject: [PATCH 041/297] dmaengine: ioatdma: Add Skylake PCI Dev ID

Adding Skylake Xeon PCI device ids for ioatdma and related bits.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/hw.h   | 2 ++
 drivers/dma/ioat/init.c | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 8e67895bcca3..abcc51b343ce 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -64,6 +64,8 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_BDX8	0x6f2e
 #define PCI_DEVICE_ID_INTEL_IOAT_BDX9	0x6f2f
 
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX	0x2021
+
 #define IOAT_VER_1_2            0x12    /* Version 1.2 */
 #define IOAT_VER_2_0            0x20    /* Version 2.0 */
 #define IOAT_VER_3_0            0x30    /* Version 3.0 */
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 90eddd9f07e4..51b2b643ba71 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -106,6 +106,8 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
 
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) },
+
 	/* I/OAT v3.3 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
@@ -243,10 +245,15 @@ static bool is_bdx_ioat(struct pci_dev *pdev)
 	}
 }
 
+static inline bool is_skx_ioat(struct pci_dev *pdev)
+{
+	return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false;
+}
+
 static bool is_xeon_cb32(struct pci_dev *pdev)
 {
 	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
-		is_hsw_ioat(pdev) || is_bdx_ioat(pdev);
+		is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev);
 }
 
 bool is_bwd_ioat(struct pci_dev *pdev)

From 34a31f0af84158955a9747fb5c6712da5bbb5331 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 13 Dec 2016 11:15:27 -0700
Subject: [PATCH 042/297] dmaengine: ioatdma: workaround SKX ioatdma version

The Skylake ioatdma is technically CBDMA 3.2+ and contains the same hardware
bits with some additional 3.3 features, but it's not really 3.3 where the
driver is concerned.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 51b2b643ba71..ace5cb2cb12f 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -1364,6 +1364,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
 	if (device->version >= IOAT_VER_3_0) {
+		if (is_skx_ioat(pdev))
+			device->version = IOAT_VER_3_2;
 		err = ioat3_dma_probe(device, ioat_dca_enabled);
 
 		if (device->version >= IOAT_VER_3_3)

From 1032471b3ec823bce7687034ac5af78a8ac99a9c Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 15 Dec 2016 11:43:30 +0100
Subject: [PATCH 043/297] dmaengine: dw: fix typo in Kconfig

platfroms -> platforms

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: fed42c198b45 ("dma: dw: add PCI part of the driver")
Cc: Viresh Kumar <vireshk@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
index e00c9b022964..5a37b9fcf40d 100644
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -24,5 +24,5 @@ config DW_DMAC_PCI
 	select DW_DMAC_CORE
 	help
 	  Support the Synopsys DesignWare AHB DMA controller on the
-	  platfroms that enumerate it as a PCI device. For example,
+	  platforms that enumerate it as a PCI device. For example,
 	  Intel Medfield has integrated this GPDMA controller.

From 7e96304d99477de1f70db42035071e56439da817 Mon Sep 17 00:00:00 2001
From: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Date: Tue, 13 Dec 2016 14:40:43 +0100
Subject: [PATCH 044/297] dmaengine: stm32-dma: Set correct args number for DMA
 request from DT

This patch sets the right number of arguments to be used for DMA clients
which request channels from DT.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/stm32-dma.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 3688d0873a3e..a884b857cba4 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -972,21 +972,18 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 	struct stm32_dma_chan *chan;
 	struct dma_chan *c;
 
-	if (dma_spec->args_count < 3)
+	if (dma_spec->args_count < 4)
 		return NULL;
 
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
-	cfg.threshold = 0;
+	cfg.threshold = dma_spec->args[3];
 
 	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
 				STM32_DMA_MAX_REQUEST_ID))
 		return NULL;
 
-	if (dma_spec->args_count > 3)
-		cfg.threshold = dma_spec->args[3];
-
 	chan = &dmadev->chan[cfg.channel_id];
 
 	c = dma_get_slave_channel(&chan->vchan.chan);

From 57b5a32135c813f2ab669039fb4ec16b30cb3305 Mon Sep 17 00:00:00 2001
From: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Date: Tue, 13 Dec 2016 14:40:46 +0100
Subject: [PATCH 045/297] dmaengine: stm32-dma: Fix null pointer dereference in
 stm32_dma_tx_status

chan->desc is always set to NULL when a DMA transfer is complete.
As a DMA transfer could be complete during the call of stm32_dma_tx_status,
we need to be sure that chan->desc is not NULL before using this variable
to avoid a null pointer deference issue.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/stm32-dma.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index a884b857cba4..3056ce7f8c69 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -880,7 +880,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 	struct virt_dma_desc *vdesc;
 	enum dma_status status;
 	unsigned long flags;
-	u32 residue;
+	u32 residue = 0;
 
 	status = dma_cookie_status(c, cookie, state);
 	if ((status == DMA_COMPLETE) || (!state))
@@ -888,16 +888,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
 	vdesc = vchan_find_desc(&chan->vchan, cookie);
-	if (cookie == chan->desc->vdesc.tx.cookie) {
+	if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
 		residue = stm32_dma_desc_residue(chan, chan->desc,
 						 chan->next_sg);
-	} else if (vdesc) {
+	else if (vdesc)
 		residue = stm32_dma_desc_residue(chan,
 						 to_stm32_dma_desc(vdesc), 0);
-	} else {
-		residue = 0;
-	}
-
 	dma_set_residue(state, residue);
 
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);

From 75bdc7f31a3a6e9a12e218b31a44a1f54a91554c Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 19 Dec 2016 06:33:51 +0100
Subject: [PATCH 046/297] dmaengine: ti-dma-crossbar: Add some 'of_node_put()'
 in error path.

Add some missing 'of_node_put()' in early exit error path.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ti-dma-crossbar.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index 3f24aeb48c0e..2403475a37cf 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -149,6 +149,7 @@ static int ti_am335x_xbar_probe(struct platform_device *pdev)
 	match = of_match_node(ti_am335x_master_match, dma_node);
 	if (!match) {
 		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
 		return -EINVAL;
 	}
 
@@ -339,6 +340,7 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev)
 	match = of_match_node(ti_dra7_master_match, dma_node);
 	if (!match) {
 		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
 		return -EINVAL;
 	}
 

From 1f2ed153b916c95a49a1ca9d7107738664224b7f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 3 Jan 2017 00:20:49 +0900
Subject: [PATCH 047/297] perf probe: Fix to get correct modname from elf
 header

Since 'perf probe' supports cross-arch probes, it is possible to analyze
different arch kernel image which has different bits-per-long.

In that case, it fails to get the module name because it uses the
MOD_NAME_OFFSET macro based on the host machine bits-per-long, instead
of the target arch bits-per-long.

This fixes above issue by changing modname-offset based on the target
archs bit width. This is ok because linux kernel uses LP64 model on
64bit arch.

E.g. without this (on x86_64, and target module is arm32):

  $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup
  p:probe/configfs_lookup :configfs_lookup+0
                          ^-Here is an empty module name.

With this fix, you can see correct module name:

  $ perf probe -m build-arm/fs/configfs/configfs.ko -D configfs_lookup
  p:probe/configfs_lookup configfs:configfs_lookup+0

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148337043836.6752.383495516397005695.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d281ae2b54e8..8f810961ec78 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -267,21 +267,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
 	return true;
 }
 
-/*
- * NOTE:
- * '.gnu.linkonce.this_module' section of kernel module elf directly
- * maps to 'struct module' from linux/module.h. This section contains
- * actual module name which will be used by kernel after loading it.
- * But, we cannot use 'struct module' here since linux/module.h is not
- * exposed to user-space. Offset of 'name' has remained same from long
- * time, so hardcoding it here.
- */
-#ifdef __LP64__
-#define MOD_NAME_OFFSET 24
-#else
-#define MOD_NAME_OFFSET 12
-#endif
-
 /*
  * @module can be module name of module file path. In case of path,
  * inspect elf and find out what is actual module name.
@@ -296,6 +281,7 @@ static char *find_module_name(const char *module)
 	Elf_Data *data;
 	Elf_Scn *sec;
 	char *mod_name = NULL;
+	int name_offset;
 
 	fd = open(module, O_RDONLY);
 	if (fd < 0)
@@ -317,7 +303,21 @@ static char *find_module_name(const char *module)
 	if (!data || !data->d_buf)
 		goto ret_err;
 
-	mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+	/*
+	 * NOTE:
+	 * '.gnu.linkonce.this_module' section of kernel module elf directly
+	 * maps to 'struct module' from linux/module.h. This section contains
+	 * actual module name which will be used by kernel after loading it.
+	 * But, we cannot use 'struct module' here since linux/module.h is not
+	 * exposed to user-space. Offset of 'name' has remained same from long
+	 * time, so hardcoding it here.
+	 */
+	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		name_offset = 12;
+	else	/* expect ELFCLASS64 by default */
+		name_offset = 24;
+
+	mod_name = strdup((char *)data->d_buf + name_offset);
 
 ret_err:
 	elf_end(elf);

From 836c3ce2566fb8c1754f8d7c9534cad9bc8a6879 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 2 Jan 2017 12:07:37 +0200
Subject: [PATCH 048/297] dmaengine: omap-dma: Fix dynamic lch_map allocation

The original patch did not done what it was supposed to be doing and even
worst it broke legacy boot (OMAP1).

The lch_map size should be the number of available logical channels in sDMA
and the od->dma_requests should store the number of available DMA request
lines usable in sDMA.

In legacy mode we do not have a way to get the DMA request count, in that
case we use OMAP_SDMA_REQUESTS (127), despite the fact that OMAP1510 have
only 31 DMA request line.

Fixes: 2d1a9a946fae ("dmaengine: omap-dma: Dynamically allocate memory for lch_map")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: stable@vger.kernel.org   # v4.9
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/omap-dma.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index ac68666cd3f4..4ad101a47e0a 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -1452,6 +1452,7 @@ static int omap_dma_probe(struct platform_device *pdev)
 	struct omap_dmadev *od;
 	struct resource *res;
 	int rc, i, irq;
+	u32 lch_count;
 
 	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
 	if (!od)
@@ -1494,20 +1495,31 @@ static int omap_dma_probe(struct platform_device *pdev)
 	spin_lock_init(&od->lock);
 	spin_lock_init(&od->irq_lock);
 
-	if (!pdev->dev.of_node) {
-		od->dma_requests = od->plat->dma_attr->lch_count;
-		if (unlikely(!od->dma_requests))
-			od->dma_requests = OMAP_SDMA_REQUESTS;
-	} else if (of_property_read_u32(pdev->dev.of_node, "dma-requests",
-					&od->dma_requests)) {
+	/* Number of DMA requests */
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
 		dev_info(&pdev->dev,
 			 "Missing dma-requests property, using %u.\n",
 			 OMAP_SDMA_REQUESTS);
-		od->dma_requests = OMAP_SDMA_REQUESTS;
 	}
 
-	od->lch_map = devm_kcalloc(&pdev->dev, od->dma_requests,
-				   sizeof(*od->lch_map), GFP_KERNEL);
+	/* Number of available logical channels */
+	if (!pdev->dev.of_node) {
+		lch_count = od->plat->dma_attr->lch_count;
+		if (unlikely(!lch_count))
+			lch_count = OMAP_SDMA_CHANNELS;
+	} else if (of_property_read_u32(pdev->dev.of_node, "dma-channels",
+					&lch_count)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-channels property, using %u.\n",
+			 OMAP_SDMA_CHANNELS);
+		lch_count = OMAP_SDMA_CHANNELS;
+	}
+
+	od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map),
+				   GFP_KERNEL);
 	if (!od->lch_map)
 		return -ENOMEM;
 

From f53243b563e8966fb5a5cd8f27d48b832d3b1c43 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 2 Jan 2017 17:42:08 +0100
Subject: [PATCH 049/297] MAINTAINERS: dmaengine: Update + Hand over the
 at_hdmac driver to Ludovic

Hand over the Microchip / Atmel DMA driver handled by at_hdmac driver
to Ludovic who is responsible for the newer at_xdmac driver as well.
Also update the entry name and position to follow company changes.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 MAINTAINERS | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..c10150853273 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2194,14 +2194,6 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/atmel
 
-ATMEL DMA DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/dma/at_hdmac.c
-F:	drivers/dma/at_hdmac_regs.h
-F:	include/linux/platform_data/dma-atmel.h
-
 ATMEL XDMA DRIVER
 M:	Ludovic Desroches <ludovic.desroches@atmel.com>
 L:	linux-arm-kernel@lists.infradead.org
@@ -8174,6 +8166,15 @@ S:	Maintained
 F:	drivers/tty/serial/atmel_serial.c
 F:	include/linux/atmel_serial.h
 
+MICROCHIP / ATMEL DMA DRIVER
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/at_hdmac.c
+F:	drivers/dma/at_hdmac_regs.h
+F:	include/linux/platform_data/dma-atmel.h
+
 MICROCHIP / ATMEL ISC DRIVER
 M:	Songjun Wu <songjun.wu@microchip.com>
 L:	linux-media@vger.kernel.org

From 5c9e6c2b2ba3ec3a442e2fb5b4286498f8b4dcb7 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 16 Dec 2016 11:39:11 +0100
Subject: [PATCH 050/297] dmaengine: pl330: Fix runtime PM support for
 terminated transfers

PL330 DMA engine driver is leaking a runtime reference after any terminated
DMA transactions. This patch fixes this issue by tracking runtime PM state
of the device and making additional call to pm_runtime_put() in terminate_all
callback if needed.

Fixes: ae43b3289186 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/pl330.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 87fd01539fcb..740bbb942594 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -448,6 +448,9 @@ struct dma_pl330_chan {
 
 	/* for cyclic capability */
 	bool cyclic;
+
+	/* for runtime pm tracking */
+	bool active;
 };
 
 struct pl330_dmac {
@@ -2033,6 +2036,7 @@ static void pl330_tasklet(unsigned long data)
 		_stop(pch->thread);
 		spin_unlock(&pch->thread->dmac->lock);
 		power_down = true;
+		pch->active = false;
 	} else {
 		/* Make sure the PL330 Channel thread is active */
 		spin_lock(&pch->thread->dmac->lock);
@@ -2052,6 +2056,7 @@ static void pl330_tasklet(unsigned long data)
 			desc->status = PREP;
 			list_move_tail(&desc->node, &pch->work_list);
 			if (power_down) {
+				pch->active = true;
 				spin_lock(&pch->thread->dmac->lock);
 				_start(pch->thread);
 				spin_unlock(&pch->thread->dmac->lock);
@@ -2166,6 +2171,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	unsigned long flags;
 	struct pl330_dmac *pl330 = pch->dmac;
 	LIST_HEAD(list);
+	bool power_down = false;
 
 	pm_runtime_get_sync(pl330->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
@@ -2176,6 +2182,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	pch->thread->req[0].desc = NULL;
 	pch->thread->req[1].desc = NULL;
 	pch->thread->req_running = -1;
+	power_down = pch->active;
+	pch->active = false;
 
 	/* Mark all desc done */
 	list_for_each_entry(desc, &pch->submitted_list, node) {
@@ -2193,6 +2201,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
 	spin_unlock_irqrestore(&pch->lock, flags);
 	pm_runtime_mark_last_busy(pl330->ddma.dev);
+	if (power_down)
+		pm_runtime_put_autosuspend(pl330->ddma.dev);
 	pm_runtime_put_autosuspend(pl330->ddma.dev);
 
 	return 0;
@@ -2357,6 +2367,7 @@ static void pl330_issue_pending(struct dma_chan *chan)
 		 * updated on work_list emptiness status.
 		 */
 		WARN_ON(list_empty(&pch->submitted_list));
+		pch->active = true;
 		pm_runtime_get_sync(pch->dmac->ddma.dev);
 	}
 	list_splice_tail_init(&pch->submitted_list, &pch->work_list);

From dcafc45dcb6d8bb6d159ed0a903bd0f3de597fac Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 2 Jan 2017 16:09:59 +0100
Subject: [PATCH 051/297] drm/meson: Fix plane atomic check when no crtc for
 the plane

When no CRTC is associated with the plane, the meson_plane_atomic_check()
call breaks the kernel with an Oops.

Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 drivers/gpu/drm/meson/meson_plane.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index 4942ca090b46..7890e30eb584 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -51,6 +51,9 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
 	struct drm_crtc_state *crtc_state;
 	struct drm_rect clip = { 0, };
 
+	if (!state->crtc)
+		return 0;
+
 	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
 	if (IS_ERR(crtc_state))
 		return PTR_ERR(crtc_state);

From b66fb1da5a8cac3f5c3cdbe41937c91efc4e76a4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:54 +0100
Subject: [PATCH 052/297] tools lib subcmd: Add OPT_STRING_OPTARG_SET option

To allow string options with a default argument and variable set when
the option is used.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/parse-options.c | 3 +++
 tools/lib/subcmd/parse-options.h | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 3284bb14ae78..8aad81151d50 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p,
 		else
 			err = get_arg(p, opt, flags, (const char **)opt->value);
 
+		if (opt->set)
+			*(bool *)opt->set = true;
+
 		/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
 		if (opt->flags & PARSE_OPT_NOEMPTY) {
 			const char *val = *(const char **)opt->value;
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 8866ac438b34..11c3be3bcce7 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -137,6 +137,11 @@ struct option {
 	{ .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
 	  .value = check_vtype(v, const char **), (a), .help = (h), \
 	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
+#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
+	{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), (a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
+	  .set = check_vtype(os, bool *)}
 #define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }

From efd21307119d5a23ac83ae8fd5a39a5c7aeb8493 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:55 +0100
Subject: [PATCH 053/297] perf record: Make __record_options static

There's no need for this one to be global.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 74d6a035133a..31cf0ce12a65 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1405,7 +1405,7 @@ static bool dry_run;
  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
  * using pipes, etc.
  */
-struct option __record_options[] = {
+static struct option __record_options[] = {
 	OPT_CALLBACK('e', "event", &record.evlist, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events_option),

From 60437ac02f398e0ee0927748d4798dd5534ac90d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 3 Jan 2017 09:19:56 +0100
Subject: [PATCH 054/297] perf record: Fix --switch-output documentation and
 comment

There's no --signal-trigger option, also adding the code comment into
record man page.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1483431600-19887-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 4 ++++
 tools/perf/builtin-record.c              | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 27fc3617c6a4..5054d9147f0f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that
 particular perf.data snapshot should be kept or not.
 
 Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+The reason for the latter two is to reduce the data file switching
+overhead. You can still switch them on with:
+
+  --switch-output --no-no-buildid  --no-no-buildid-cache
 
 --dry-run::
 Parse options then exit. --dry-run can be used to detect errors in cmdline
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 31cf0ce12a65..4ec10e9427d9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		 * overhead. Still generate buildid if they are required
 		 * explicitly using
 		 *
-		 *  perf record --signal-trigger --no-no-buildid \
+		 *  perf record --switch-output --no-no-buildid \
 		 *              --no-no-buildid-cache
 		 *
 		 * Following code equals to:

From aa7c8da35d1905d80e840d075f07d26ec90144b5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 20 Dec 2016 13:28:27 -0500
Subject: [PATCH 055/297] btrfs: fix error handling when run_delayed_extent_op
 fails

In __btrfs_run_delayed_refs, the error path when run_delayed_extent_op
fails sets locked_ref->processing = 0 but doesn't re-increment
delayed_refs->num_heads_ready.  As a result, we end up triggering
the WARN_ON in btrfs_select_ref_head.

Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads)
Reported-by: Jon Nelson <jnelson-suse@jamponi.net>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e97302f437a1..5366e50c84c6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2572,7 +2572,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 					 */
 					if (must_insert_reserved)
 						locked_ref->must_insert_reserved = 1;
+					spin_lock(&delayed_refs->lock);
 					locked_ref->processing = 0;
+					delayed_refs->num_heads_ready++;
+					spin_unlock(&delayed_refs->lock);
 					btrfs_debug(fs_info,
 						    "run_delayed_extent_op returned %d",
 						    ret);

From d0280996437081dd12ed1e982ac8aeaa62835ec4 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 20 Dec 2016 13:28:28 -0500
Subject: [PATCH 056/297] btrfs: fix locking when we put back a delayed ref
 that's too new

In __btrfs_run_delayed_refs, when we put back a delayed ref that's too
new, we have already dropped the lock on locked_ref when we set
->processing = 0.

This patch keeps the lock to cover that assignment.

Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads)
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5366e50c84c6..ac7e6713033c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2522,11 +2522,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 		if (ref && ref->seq &&
 		    btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
 			spin_unlock(&locked_ref->lock);
-			btrfs_delayed_ref_unlock(locked_ref);
 			spin_lock(&delayed_refs->lock);
 			locked_ref->processing = 0;
 			delayed_refs->num_heads_ready++;
 			spin_unlock(&delayed_refs->lock);
+			btrfs_delayed_ref_unlock(locked_ref);
 			locked_ref = NULL;
 			cond_resched();
 			count++;

From e321f8a801d7b4c40da8005257b05b9c2b51b072 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:11:04 -0800
Subject: [PATCH 057/297] Btrfs: use down_read_nested to make lockdep silent

If @block_group is not @used_bg, it'll try to get @used_bg's lock without
droping @block_group 's lock and lockdep has throwed a scary deadlock warning
about it.
Fix it by using down_read_nested.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ac7e6713033c..dcd2e798767e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7387,7 +7387,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
 
 		spin_unlock(&cluster->refill_lock);
 
-		down_read(&used_bg->data_rwsem);
+		/* We should only have one-level nested. */
+		down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
 
 		spin_lock(&cluster->refill_lock);
 		if (used_bg == cluster->block_group)

From 781feef7e6befafd4d9787d1f7ada1f9ccd504e4 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:20:25 -0800
Subject: [PATCH 058/297] Btrfs: fix lockdep warning about log_mutex

While checking INODE_REF/INODE_EXTREF for a corner case, we may acquire a
different inode's log_mutex with holding the current inode's log_mutex, and
lockdep has complained this with a possilble deadlock warning.

Fix this by using mutex_lock_nested() when processing the other inode's
log_mutex.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f10bf5213ed8..eeffff84f280 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -37,6 +37,7 @@
  */
 #define LOG_INODE_ALL 0
 #define LOG_INODE_EXISTS 1
+#define LOG_OTHER_INODE 2
 
 /*
  * directory trouble cases
@@ -4641,7 +4642,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	if (S_ISDIR(inode->i_mode) ||
 	    (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 		       &BTRFS_I(inode)->runtime_flags) &&
-	     inode_only == LOG_INODE_EXISTS))
+	     inode_only >= LOG_INODE_EXISTS))
 		max_key.type = BTRFS_XATTR_ITEM_KEY;
 	else
 		max_key.type = (u8)-1;
@@ -4665,7 +4666,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	mutex_lock(&BTRFS_I(inode)->log_mutex);
+	if (inode_only == LOG_OTHER_INODE) {
+		inode_only = LOG_INODE_EXISTS;
+		mutex_lock_nested(&BTRFS_I(inode)->log_mutex,
+				  SINGLE_DEPTH_NESTING);
+	} else {
+		mutex_lock(&BTRFS_I(inode)->log_mutex);
+	}
 
 	/*
 	 * a brute force approach to making sure we get the most uptodate
@@ -4817,7 +4824,7 @@ again:
 				 * unpin it.
 				 */
 				err = btrfs_log_inode(trans, root, other_inode,
-						      LOG_INODE_EXISTS,
+						      LOG_OTHER_INODE,
 						      0, LLONG_MAX, ctx);
 				iput(other_inode);
 				if (err)

From c2931667c83ded6504b3857e99cc45b21fa496fb Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 22 Dec 2016 17:13:54 -0800
Subject: [PATCH 059/297] Btrfs: adjust outstanding_extents counter properly
 when dio write is split

Currently how btrfs dio deals with split dio write is not good
enough if dio write is split into several segments due to the
lack of contiguous space, a large dio write like 'dd bs=1G count=1'
can end up with incorrect outstanding_extents counter and endio
would complain loudly with an assertion.

This fixes the problem by compensating the outstanding_extents
counter in inode if a large dio write gets split.

Reported-by: Anand Jain <anand.jain@oracle.com>
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a713d9d324b0..81b9d9d0450c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7623,11 +7623,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
 	 * within our reservation, otherwise we need to adjust our inode
 	 * counter appropriately.
 	 */
-	if (dio_data->outstanding_extents) {
+	if (dio_data->outstanding_extents >= num_extents) {
 		dio_data->outstanding_extents -= num_extents;
 	} else {
+		/*
+		 * If dio write length has been split due to no large enough
+		 * contiguous space, we need to compensate our inode counter
+		 * appropriately.
+		 */
+		u64 num_needed = num_extents - dio_data->outstanding_extents;
+
 		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->outstanding_extents += num_extents;
+		BTRFS_I(inode)->outstanding_extents += num_needed;
 		spin_unlock(&BTRFS_I(inode)->lock);
 	}
 }

From 3b046a97cbd35a73e1eef968dbfb1a0aac745a77 Mon Sep 17 00:00:00 2001
From: Robert LeBlanc <robert@leblancnet.us>
Date: Mon, 5 Dec 2016 13:02:57 -0700
Subject: [PATCH 060/297] md/raid1: Refactor raid1_make_request

Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c | 267 +++++++++++++++++++++++----------------------
 1 file changed, 139 insertions(+), 128 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a1f3fbed9100..14422407e520 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1066,17 +1066,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+				 struct r1bio *r1_bio)
 {
 	struct r1conf *conf = mddev->private;
 	struct raid1_info *mirror;
-	struct r1bio *r1_bio;
 	struct bio *read_bio;
+	struct bitmap *bitmap = mddev->bitmap;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	int rdisk;
+
+	wait_barrier(conf, bio);
+
+read_again:
+	rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+	if (rdisk < 0) {
+		/* couldn't find anywhere to read from */
+		raid_end_bio_io(r1_bio);
+		return;
+	}
+	mirror = conf->mirrors + rdisk;
+
+	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+	    bitmap) {
+		/*
+		 * Reading from a write-mostly device must take care not to
+		 * over-take any writes that are 'behind'
+		 */
+		raid1_log(mddev, "wait behind writes");
+		wait_event(bitmap->behind_wait,
+			   atomic_read(&bitmap->behind_writes) == 0);
+	}
+	r1_bio->read_disk = rdisk;
+	r1_bio->start_next_window = 0;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r1_bio->bios[rdisk] = read_bio;
+
+	read_bio->bi_iter.bi_sector = r1_bio->sector +
+		mirror->rdev->data_offset;
+	read_bio->bi_bdev = mirror->rdev->bdev;
+	read_bio->bi_end_io = raid1_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	if (test_bit(FailFast, &mirror->rdev->flags) &&
+	    test_bit(R1BIO_FailFast, &r1_bio->state))
+	        read_bio->bi_opf |= MD_FAILFAST;
+	read_bio->bi_private = r1_bio;
+
+	if (mddev->gendisk)
+	        trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+	                              read_bio, disk_devt(mddev->gendisk),
+	                              r1_bio->sector);
+
+	if (max_sectors < r1_bio->sectors) {
+		/*
+		 * could not read all from this device, so we will need another
+		 * r1_bio.
+		 */
+		sectors_handled = (r1_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r1_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+
+		/*
+		 * Cannot call generic_make_request directly as that will be
+		 * queued in __make_request and subsequent mempool_alloc might
+		 * block waiting for it.  So hand bio over to raid1d.
+		 */
+		reschedule_retry(r1_bio);
+
+		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+		r1_bio->master_bio = bio;
+		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r1_bio->state = 0;
+		r1_bio->mddev = mddev;
+		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+				struct r1bio *r1_bio)
+{
+	struct r1conf *conf = mddev->private;
 	int i, disks;
-	struct bitmap *bitmap;
+	struct bitmap *bitmap = mddev->bitmap;
 	unsigned long flags;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_opf &
 						(REQ_PREFLUSH | REQ_FUA));
@@ -1096,15 +1186,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (bio_data_dir(bio) == WRITE &&
-	    ((bio_end_sector(bio) > mddev->suspend_lo &&
+	if ((bio_end_sector(bio) > mddev->suspend_lo &&
 	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
 	    (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
-		/* As the suspend_* range is controlled by
-		 * userspace, we want an interruptible
-		 * wait.
+		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
+
+		/*
+		 * As the suspend_* range is controlled by userspace, we want
+		 * an interruptible wait.
 		 */
 		DEFINE_WAIT(w);
 		for (;;) {
@@ -1115,128 +1205,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
 			    (mddev_is_clustered(mddev) &&
 			     !md_cluster_ops->area_resyncing(mddev, WRITE,
-				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
+				     bio->bi_iter.bi_sector,
+				     bio_end_sector(bio))))
 				break;
 			schedule();
 		}
 		finish_wait(&conf->wait_barrier, &w);
 	}
-
 	start_next_window = wait_barrier(conf, bio);
 
-	bitmap = mddev->bitmap;
-
-	/*
-	 * make_request() can abort the operation when read-ahead is being
-	 * used and no empty request is available.
-	 *
-	 */
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-	r1_bio->master_bio = bio;
-	r1_bio->sectors = bio_sectors(bio);
-	r1_bio->state = 0;
-	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_iter.bi_sector;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r1_bio and no locking
-	 * will be needed when requests complete.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		int rdisk;
-
-read_again:
-		rdisk = read_balance(conf, r1_bio, &max_sectors);
-
-		if (rdisk < 0) {
-			/* couldn't find anywhere to read from */
-			raid_end_bio_io(r1_bio);
-			return;
-		}
-		mirror = conf->mirrors + rdisk;
-
-		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
-		    bitmap) {
-			/* Reading from a write-mostly device must
-			 * take care not to over-take any writes
-			 * that are 'behind'
-			 */
-			raid1_log(mddev, "wait behind writes");
-			wait_event(bitmap->behind_wait,
-				   atomic_read(&bitmap->behind_writes) == 0);
-		}
-		r1_bio->read_disk = rdisk;
-		r1_bio->start_next_window = 0;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r1_bio->bios[rdisk] = read_bio;
-
-		read_bio->bi_iter.bi_sector = r1_bio->sector +
-			mirror->rdev->data_offset;
-		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		if (test_bit(FailFast, &mirror->rdev->flags) &&
-		    test_bit(R1BIO_FailFast, &r1_bio->state))
-			read_bio->bi_opf |= MD_FAILFAST;
-		read_bio->bi_private = r1_bio;
-
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
-					      read_bio, disk_devt(mddev->gendisk),
-					      r1_bio->sector);
-
-		if (max_sectors < r1_bio->sectors) {
-			/* could not read all from this device, so we will
-			 * need another r1_bio.
-			 */
-
-			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __make_request
-			 * and subsequent mempool_alloc might block waiting
-			 * for it.  So hand bio over to raid1d.
-			 */
-			reschedule_retry(r1_bio);
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = bio;
-			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r1_bio->state = 0;
-			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		raid1_log(mddev, "wait queued");
@@ -1280,8 +1257,7 @@ read_again:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, r1_bio->sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* mustn't write here until the bad block is
@@ -1370,7 +1346,8 @@ read_again:
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+			 max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -1464,6 +1441,40 @@ read_again:
 	wake_up(&conf->wait_barrier);
 }
 
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r1conf *conf = mddev->private;
+	struct r1bio *r1_bio;
+
+	/*
+	 * make_request() can abort the operation when read-ahead is being
+	 * used and no empty request is available.
+	 *
+	 */
+	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+	r1_bio->master_bio = bio;
+	r1_bio->sectors = bio_sectors(bio);
+	r1_bio->state = 0;
+	r1_bio->mddev = mddev;
+	r1_bio->sector = bio->bi_iter.bi_sector;
+
+	/*
+	 * We might need to issue multiple reads to different devices if there
+	 * are bad blocks around, so we keep track of the number of reads in
+	 * bio->bi_phys_segments.  If this is 0, there is only one r1_bio and
+	 * no locking will be needed when requests complete.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid1_read_request(mddev, bio, r1_bio);
+	else
+		raid1_write_request(mddev, bio, r1_bio);
+}
+
 static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r1conf *conf = mddev->private;

From bb5f1ed70bc3bbbce510907da3432dab267ff508 Mon Sep 17 00:00:00 2001
From: Robert LeBlanc <robert@leblancnet.us>
Date: Mon, 5 Dec 2016 13:02:58 -0700
Subject: [PATCH 061/297] md/raid10: Refactor raid10_make_request

Refactor raid10_make_request into seperate read and write functions to
clean up the code.

Shaohua: add the recovery check back to read path

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 245 +++++++++++++++++++++++++-------------------
 1 file changed, 140 insertions(+), 105 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ab5e86209322..1920756828df 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+				struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
 	struct bio *read_bio;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	sector_t sectors;
+	struct md_rdev *rdev;
+	int slot;
+
+	/*
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
+	 */
+	wait_barrier(conf);
+
+	sectors = bio_sectors(bio);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
+		 */
+		raid10_log(conf->mddev, "wait reshape");
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+
+read_again:
+	rdev = read_balance(conf, r10_bio, &max_sectors);
+	if (!rdev) {
+		raid_end_bio_io(r10_bio);
+		return;
+	}
+	slot = r10_bio->read_slot;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r10_bio->devs[slot].bio = read_bio;
+	r10_bio->devs[slot].rdev = rdev;
+
+	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+		choose_data_offset(r10_bio, rdev);
+	read_bio->bi_bdev = rdev->bdev;
+	read_bio->bi_end_io = raid10_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	if (test_bit(FailFast, &rdev->flags) &&
+	    test_bit(R10BIO_FailFast, &r10_bio->state))
+	        read_bio->bi_opf |= MD_FAILFAST;
+	read_bio->bi_private = r10_bio;
+
+	if (mddev->gendisk)
+	        trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+	                              read_bio, disk_devt(mddev->gendisk),
+	                              r10_bio->sector);
+	if (max_sectors < r10_bio->sectors) {
+		/*
+		 * Could not read all from this device, so we will need another
+		 * r10_bio.
+		 */
+		sectors_handled = (r10_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/*
+		 * Cannot call generic_make_request directly as that will be
+		 * queued in __generic_make_request and subsequent
+		 * mempool_alloc might block waiting for it.  so hand bio over
+		 * to raid10d.
+		 */
+		reschedule_retry(r10_bio);
+
+		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+		r10_bio->master_bio = bio;
+		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r10_bio->state = 0;
+		r10_bio->mddev = mddev;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+	return;
+}
+
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+				 struct r10bio *r10_bio)
+{
+	struct r10conf *conf = mddev->private;
 	int i;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
 	unsigned long flags;
 	struct md_rdev *blocked_rdev;
 	struct blk_plug_cb *cb;
 	struct raid10_plug_cb *plug = NULL;
+	sector_t sectors;
 	int sectors_handled;
 	int max_sectors;
-	int sectors;
 
 	md_write_start(mddev, bio);
 
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    bio->bi_iter.bi_sector < conf->reshape_progress &&
 	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/* IO spans the reshape position.  Need to wait for
-		 * reshape to pass
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
 		 */
 		raid10_log(conf->mddev, "wait reshape");
 		allow_barrier(conf);
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 			   sectors);
 		wait_barrier(conf);
 	}
+
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio_data_dir(bio) == WRITE &&
 	    (mddev->reshape_backwards
 	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
 		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 		conf->reshape_safe = mddev->reshape_position;
 	}
 
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = sectors;
-
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_iter.bi_sector;
-	r10_bio->state = 0;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		struct md_rdev *rdev;
-		int slot;
-
-read_again:
-		rdev = read_balance(conf, r10_bio, &max_sectors);
-		if (!rdev) {
-			raid_end_bio_io(r10_bio);
-			return;
-		}
-		slot = r10_bio->read_slot;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r10_bio->devs[slot].bio = read_bio;
-		r10_bio->devs[slot].rdev = rdev;
-
-		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
-			choose_data_offset(r10_bio, rdev);
-		read_bio->bi_bdev = rdev->bdev;
-		read_bio->bi_end_io = raid10_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		if (test_bit(FailFast, &rdev->flags) &&
-		    test_bit(R10BIO_FailFast, &r10_bio->state))
-			read_bio->bi_opf |= MD_FAILFAST;
-		read_bio->bi_private = r10_bio;
-
-		if (mddev->gendisk)
-			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
-					      read_bio, disk_devt(mddev->gendisk),
-					      r10_bio->sector);
-		if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
-			 */
-			sectors_handled = (r10_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r10_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
-			 */
-			reschedule_retry(r10_bio);
-
-			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-			r10_bio->master_bio = bio;
-			r10_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r10_bio->state = 0;
-			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		raid10_log(mddev, "wait queued");
@@ -1300,8 +1308,7 @@ retry_write:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, dev_sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* Mustn't write here until the bad block
@@ -1405,8 +1412,7 @@ retry_write:
 			r10_bio->devs[i].bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
-					   choose_data_offset(r10_bio,
-							      rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1457,8 +1463,7 @@ retry_write:
 			r10_bio->devs[i].repl_bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
-					   choose_data_offset(
-						   r10_bio, rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1503,6 +1508,36 @@ retry_write:
 	one_write_done(r10_bio);
 }
 
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	struct r10bio *r10_bio;
+
+	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+	r10_bio->master_bio = bio;
+	r10_bio->sectors = bio_sectors(bio);
+
+	r10_bio->mddev = mddev;
+	r10_bio->sector = bio->bi_iter.bi_sector;
+	r10_bio->state = 0;
+
+	/*
+	 * We might need to issue multiple reads to different devices if there
+	 * are bad blocks around, so we keep track of the number of reads in
+	 * bio->bi_phys_segments.  If this is 0, there is only one r10_bio and
+	 * no locking will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid10_read_request(mddev, bio, r10_bio);
+	else
+		raid10_write_request(mddev, bio, r10_bio);
+}
+
 static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;

From 074859184d770824f4437dca716bdeb625ae8b1c Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Tue, 3 Jan 2017 12:42:42 +0100
Subject: [PATCH 062/297] tools lib traceevent: Fix prev/next_prio for deadline
 tasks

Currently, the sched:sched_switch tracepoint reports deadline tasks with
priority -1. But when reading the trace via perf script I've got the
following output:

  # ./d & # (d is a deadline task, see [1])
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295]
               d  2593 [000]  2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295]

The task d reports the wrong priority [4294967295]. This happens because
the "int prio" is stored in an unsigned long long val. Although it is
set as a %lld, as int is shorter than unsigned long long,
trace_seq_printf prints it as a positive number.

The fix is just to cast the val as an int, and print it as a %d,
as in the sched:sched_switch tracepoint's "format".

The output with the fix is:

  # ./d &
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1]
               d 10941 [000]  4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120]

[1] d.c

 ---
  #include <stdio.h>
  #include <unistd.h>
  #include <sys/syscall.h>
  #include <linux/types.h>
  #include <linux/sched.h>

  struct sched_attr {
	__u32 size, sched_policy;
	__u64 sched_flags;
	__s32 sched_nice;
	__u32 sched_priority;
	__u64 sched_runtime, sched_deadline, sched_period;
  };

  int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags)
  {
	return syscall(__NR_sched_setattr, pid, attr, flags);
  }

  int main(void)
  {
	struct sched_attr attr = {
		.size		= sizeof(attr),
		.sched_policy	= SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */
		.sched_runtime	= 10 * 1000 * 1000,
		.sched_period	= attr.sched_deadline = 30 * 1000 * 1000,
	};

	if (sched_setattr(0, &attr, 0) < 0) {
		perror("sched_setattr");
		return -1;
	}

	for(;;);
  }
 ---

Committer notes:

Got the program from the provided URL, http://bristot.me/lkml/d.c,
trimmed it and included in the cset log above, so that we have
everything needed to test it in one place.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/plugin_sched_switch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index f1ce60065258..ec30c2fcbac0 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld ", val);
 
 	if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, "[%lld] ", val);
+		trace_seq_printf(s, "[%d] ", (int) val);
 
 	if (pevent_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
 		write_state(s, val);
@@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld", val);
 
 	if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, " [%lld]", val);
+		trace_seq_printf(s, " [%d]", (int) val);
 
 	return 0;
 }

From 30a9c6444810429aa2b7cbfbd453ce339baaadbf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Jan 2017 12:03:59 -0300
Subject: [PATCH 063/297] perf tools: Install tools/lib/traceevent plugins with
 install-bin

Those are binaries as well, so should be installed by:

  make -C tools/perf install-bin'

too.

Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-3841b37u05evxrs1igkyu6ks@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e9ec531131ca..4db68aec9913 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -704,9 +704,9 @@ install-tests: all install-gtk
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
 		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
-install-bin: install-tools install-tests
+install-bin: install-tools install-tests install-traceevent-plugins
 
-install: install-bin try-install-man install-traceevent-plugins
+install: install-bin try-install-man
 
 install-python_ext:
 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'

From 7934c98a6e04028eb34c1293bfb5a6b0ab630b66 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Jan 2017 15:19:21 -0300
Subject: [PATCH 064/297] perf symbols: Robustify reading of build-id from
 sysfs

Markus reported that perf segfaults when reading /sys/kernel/notes from
a kernel linked with GNU gold, due to what looks like a gold bug, so do
some bounds checking to avoid crashing in that case.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Report-Link: http://lkml.kernel.org/r/20161219161821.GA294@x4
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-ryhgs6a6jxvz207j2636w31c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 99400b0e8f2a..adbc6c02c3aa 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
 				break;
 		} else {
 			int n = namesz + descsz;
+
+			if (n > (int)sizeof(bf)) {
+				n = sizeof(bf);
+				pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+					 __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+			}
 			if (read(fd, bf, n) != n)
 				break;
 		}

From 47e3a5edc6538d66e470aaed3b7c57255cb37ca1 Mon Sep 17 00:00:00 2001
From: Paul Donohue <linux-kernel@PaulSD.com>
Date: Tue, 3 Jan 2017 10:39:28 -0800
Subject: [PATCH 065/297] Input: ALPS - fix TrackStick Y axis handling for SS5
 hardware

A minus character was lost in commit 23fce365, causing the Y axis to be
inverted for SS5 TrackStick events.  (Pushing the TrackStick up caused
the pointer to move down, and vice versa.)  Restore the lost minus.

Fixes: 23fce365c6a2 ("Input: ALPS - clean up code for SS5 hardware")
Signed-off-by: Paul Donohue <linux-kernel@PaulSD.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index cde6f4bd8ea2..6d279aa27cb9 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -114,7 +114,7 @@ enum SS4_PACKET_ID {
 				 (_b[1] & 0x7F)		\
 				)
 
-#define SS4_TS_Y_V2(_b)		(s8)(				\
+#define SS4_TS_Y_V2(_b)		-(s8)(				\
 				 ((_b[3] & 0x01) << 7) |	\
 				 (_b[2] & 0x7F)		\
 				)

From 01427fe7c4b956b878e55e966690624a3624e991 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 3 Jan 2017 11:51:48 -0800
Subject: [PATCH 066/297] Input: adxl34x - make it enumerable in ACPI
 environment

The ACPI-enabled platform may contain _DSD method to enable this driver
using compatible string.

Remove OF specifics to re-use existing code on ACPI-enabled platforms.

Suggested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/adxl34x-i2c.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index a8b0a2eec344..7fed92fb8cc1 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -136,7 +136,6 @@ static const struct i2c_device_id adxl34x_id[] = {
 
 MODULE_DEVICE_TABLE(i2c, adxl34x_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id adxl34x_of_id[] = {
 	/*
 	 * The ADXL346 is backward-compatible with the ADXL345. Differences are
@@ -153,13 +152,12 @@ static const struct of_device_id adxl34x_of_id[] = {
 };
 
 MODULE_DEVICE_TABLE(of, adxl34x_of_id);
-#endif
 
 static struct i2c_driver adxl34x_driver = {
 	.driver = {
 		.name = "adxl34x",
 		.pm = &adxl34x_i2c_pm,
-		.of_match_table = of_match_ptr(adxl34x_of_id),
+		.of_match_table = adxl34x_of_id,
 	},
 	.probe    = adxl34x_i2c_probe,
 	.remove   = adxl34x_i2c_remove,

From f97fd383d9a10fd125bcdafba03240685aed5608 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 19 Dec 2016 15:47:14 +0100
Subject: [PATCH 067/297] drm: tilcdc: simplify the recovery from sync lost
 error on rev1

Revision 2 of LCDC suffers from an issue where a SYNC_LOST error
caused by limited memory bandwidth may leave the picture shifted a
couple pixels to the right.

This issue has not been observed on revision 1, while the recovery
mechanism introduces a different issue, where the END_OF_FRAME
interrupt doesn't fire while drm is waiting for vblanks.

On rev1: recover from sync lost errors by simply clearing the
RASTER_ENABLE bit in the RASTER_CTRL register and re-enabling it
again as is suggested by the datasheet.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Jyri Sarha <jsarha@ti.com>
---
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 9942b0577d6e..20041073e46d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -856,7 +856,7 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_drm_private *priv = dev->dev_private;
-	uint32_t stat;
+	uint32_t stat, reg;
 
 	stat = tilcdc_read_irqstatus(dev);
 	tilcdc_clear_irqstatus(dev, stat);
@@ -921,17 +921,26 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 		dev_err_ratelimited(dev->dev, "%s(0x%08x): Sync lost",
 				    __func__, stat);
 		tilcdc_crtc->frame_intact = false;
-		if (tilcdc_crtc->sync_lost_count++ >
-		    SYNC_LOST_COUNT_LIMIT) {
-			dev_err(dev->dev, "%s(0x%08x): Sync lost flood detected, recovering", __func__, stat);
-			queue_work(system_wq, &tilcdc_crtc->recover_work);
-			if (priv->rev == 1)
+		if (priv->rev == 1) {
+			reg = tilcdc_read(dev, LCDC_RASTER_CTRL_REG);
+			if (reg & LCDC_RASTER_ENABLE) {
 				tilcdc_clear(dev, LCDC_RASTER_CTRL_REG,
-					     LCDC_V1_SYNC_LOST_INT_ENA);
-			else
+					     LCDC_RASTER_ENABLE);
+				tilcdc_set(dev, LCDC_RASTER_CTRL_REG,
+					   LCDC_RASTER_ENABLE);
+			}
+		} else {
+			if (tilcdc_crtc->sync_lost_count++ >
+			    SYNC_LOST_COUNT_LIMIT) {
+				dev_err(dev->dev,
+					"%s(0x%08x): Sync lost flood detected, recovering",
+					__func__, stat);
+				queue_work(system_wq,
+					   &tilcdc_crtc->recover_work);
 				tilcdc_write(dev, LCDC_INT_ENABLE_CLR_REG,
 					     LCDC_SYNC_LOST);
-			tilcdc_crtc->sync_lost_count = 0;
+				tilcdc_crtc->sync_lost_count = 0;
+			}
 		}
 	}
 

From aebe55c2d4b998741c0847ace1b4af47d73c763b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 3 Jan 2017 01:14:27 +0200
Subject: [PATCH 068/297] drm: Clean up planes in atomic commit helper failure
 path

If waiting for fences fails for blocking commits, planes must be cleaned
up before returning.

Cc: stable@vger.kernel.org
Fixes: f6ce410a59a4 ("drm/fence: allow fence waiting to be interrupted by userspace")
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170102231427.7192-1-laurent.pinchart@ideasonboard.com
---
 drivers/gpu/drm/drm_atomic_helper.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 583f47f27b36..34f757bcabae 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1259,8 +1259,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 
 	if (!nonblock) {
 		ret = drm_atomic_helper_wait_for_fences(dev, state, true);
-		if (ret)
+		if (ret) {
+			drm_atomic_helper_cleanup_planes(dev, state);
 			return ret;
+		}
 	}
 
 	/*

From 0c931a290cc0377c99a8cd970a49e736dbb23e0e Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 2 Jan 2017 16:14:15 +0100
Subject: [PATCH 069/297] drm/meson: Fix CVBS initialization when HDMI is
 configured by bootloader

When the HDMI output is configured by the bootloader, there is mismatch is the
pipeline configuration and the Vsync interrupt fails to trigger.

This commit disables the HDMI blocks in the probe phase.

Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 drivers/gpu/drm/meson/meson_venc.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index d836b2274531..f7c870172220 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -38,6 +38,11 @@
  * - TV Panel encoding via ENCT
  */
 
+/* HHI Registers */
+#define HHI_VDAC_CNTL0		0x2F4 /* 0xbd offset in data sheet */
+#define HHI_VDAC_CNTL1		0x2F8 /* 0xbe offset in data sheet */
+#define HHI_HDMI_PHY_CNTL0	0x3a0 /* 0xe8 offset in data sheet */
+
 struct meson_cvbs_enci_mode meson_cvbs_enci_pal = {
 	.mode_tag = MESON_VENC_MODE_CVBS_PAL,
 	.hso_begin = 3,
@@ -242,6 +247,20 @@ void meson_venc_disable_vsync(struct meson_drm *priv)
 
 void meson_venc_init(struct meson_drm *priv)
 {
+	/* Disable CVBS VDAC */
+	regmap_write(priv->hhi, HHI_VDAC_CNTL0, 0);
+	regmap_write(priv->hhi, HHI_VDAC_CNTL1, 8);
+
+	/* Power Down Dacs */
+	writel_relaxed(0xff, priv->io_base + _REG(VENC_VDAC_SETTING));
+
+	/* Disable HDMI PHY */
+	regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0);
+
+	/* Disable HDMI */
+	writel_bits_relaxed(0x3, 0,
+			    priv->io_base + _REG(VPU_HDMI_SETTING));
+
 	/* Disable all encoders */
 	writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
 	writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));

From 5db60ea93d4fbf146c8f7ca286b8b2a091761460 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jan 2017 10:51:02 +0100
Subject: [PATCH 070/297] drm/meson: Fix CVBS VDAC disable

This commit fixes the VDAC disabling register write values.

Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 drivers/gpu/drm/meson/meson_venc_cvbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/meson/meson_venc_cvbs.c b/drivers/gpu/drm/meson/meson_venc_cvbs.c
index c809c085fd78..a2bcc70a03ef 100644
--- a/drivers/gpu/drm/meson/meson_venc_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_venc_cvbs.c
@@ -167,7 +167,7 @@ static void meson_venc_cvbs_encoder_disable(struct drm_encoder *encoder)
 
 	/* Disable CVBS VDAC */
 	regmap_write(priv->hhi, HHI_VDAC_CNTL0, 0);
-	regmap_write(priv->hhi, HHI_VDAC_CNTL1, 0);
+	regmap_write(priv->hhi, HHI_VDAC_CNTL1, 8);
 }
 
 static void meson_venc_cvbs_encoder_enable(struct drm_encoder *encoder)

From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 4 Jan 2017 12:29:05 +0900
Subject: [PATCH 071/297] perf probe: Fix --funcs to show correct symbols for
 offline module

Fix --funcs (-F) option to show correct symbols for offline module.
Since previous perf-probe uses machine__findnew_module_map() for offline
module, even if user passes a module file (with full path) which is for
other architecture, perf-probe always tries to load symbol map for
current kernel module.

This fix uses dso__new_map() to load the map from given binary as same
as a map for user applications.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8f810961ec78..542e6472c4d7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module)
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
-		return machine__findnew_module_map(host_machine, 0, module);
+		return dso__new_map(module);
 
 	if (!module)
 		module = "kernel";
@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
 		if (strncmp(pos->dso->short_name + 1, module,
 			    pos->dso->short_name_len - 2) == 0 &&
 		    module[pos->dso->short_name_len - 2] == '\0') {
+			map__get(pos);
 			return pos;
 		}
 	}
@@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user)
 		return kernel_get_module_map(target);
 }
 
-static void put_target_map(struct map *map, bool user)
-{
-	if (map && user) {
-		/* Only the user map needs to be released */
-		map__put(map);
-	}
-}
-
-
 static int convert_exec_to_group(const char *exec, char **result)
 {
 	char *ptr1, *ptr2, *exec_copy;
@@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
 	}
 
 out:
-	put_target_map(map, uprobes);
+	map__put(map);
 	return ret;
 
 }
@@ -2869,7 +2861,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	}
 
 out:
-	put_target_map(map, pev->uprobes);
+	map__put(map);
 	free(syms);
 	return ret;
 
@@ -3362,10 +3354,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 		return ret;
 
 	/* Get a symbol map */
-	if (user)
-		map = dso__new_map(target);
-	else
-		map = kernel_get_module_map(target);
+	map = get_target_map(target, user);
 	if (!map) {
 		pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
 		return -EINVAL;
@@ -3397,9 +3386,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
         }
 
 end:
-	if (user) {
-		map__put(map);
-	}
+	map__put(map);
 	exit_probe_symbol_maps();
 
 	return ret;

From 8a937a25a7e3c19d5fb3f9d92f605cf5fda219d8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 4 Jan 2017 12:30:19 +0900
Subject: [PATCH 072/297] perf probe: Fix to probe on gcc generated symbols for
 offline kernel

Fix perf-probe to show probe definition on gcc generated symbols for
offline kernel (including cross-arch kernel image).

gcc sometimes optimizes functions and generate new symbols with suffixes
such as ".constprop.N" or ".isra.N" etc. Since those symbol names are
not recorded in DWARF, we have to find correct generated symbols from
offline ELF binary to probe on it (kallsyms doesn't correct it).  For
online kernel or uprobes we don't need it because those are rebased on
_text, or a section relative address.

E.g. Without this:

  $ perf probe -k build-arm/vmlinux -F __slab_alloc*
  __slab_alloc.constprop.9
  $ perf probe -k build-arm/vmlinux -D __slab_alloc
  p:probe/__slab_alloc __slab_alloc+0

If you put above definition on target machine, it should fail
because there is no __slab_alloc in kallsyms.

With this fix, perf probe shows correct probe definition on
__slab_alloc.constprop.9:

  $ perf probe -k build-arm/vmlinux -D __slab_alloc
  p:probe/__slab_alloc __slab_alloc.constprop.9+0

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148350060434.19001.11864836288580083501.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 48 ++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 542e6472c4d7..4a57c8a60bd9 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -610,6 +610,51 @@ error:
 	return ret ? : -ENOENT;
 }
 
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+					int ntevs, const char *pathname)
+{
+	struct symbol *sym;
+	struct map *map;
+	unsigned long stext = 0;
+	u64 addr;
+	int i;
+
+	/* Prepare a map for offline binary */
+	map = dso__new_map(pathname);
+	if (!map || get_text_start_address(pathname, &stext) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", pathname);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		addr = tevs[i].point.address + tevs[i].point.offset - stext;
+		sym = map__find_symbol(map, addr);
+		if (!sym)
+			continue;
+		if (!strcmp(sym->name, tevs[i].point.symbol))
+			continue;
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = strdup(sym->name);
+		tevs[i].point.offset = addr - sym->start;
+	}
+	map__put(map);
+
+	return 0;
+}
+
 static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
 					  int ntevs, const char *exec)
 {
@@ -671,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 
 	/* Skip post process if the target is an offline kernel */
 	if (symbol_conf.ignore_vmlinux_buildid)
-		return 0;
+		return post_process_offline_probe_trace_events(tevs, ntevs,
+						symbol_conf.vmlinux_name);
 
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {

From 4ee437fbf626b5ad756889d8bc0fcead3d66dde7 Mon Sep 17 00:00:00 2001
From: Caleb Crome <caleb@crome.org>
Date: Tue, 3 Jan 2017 10:22:57 -0800
Subject: [PATCH 073/297] ASoC: fsl_ssi: set fifo watermark to more reliable
 value

The fsl_ssi fifo watermark is by default set to 2 free spaces (i.e.
activate DMA on FIFO when only 2 spaces are left.)  This means the
DMA must service the fifo within 2 audio samples, which is just not
enough time  for many use cases with high data rate.  In many
configurations the audio channel slips (causing l/r swap in stereo
configurations, or channel slipping in multi-channel configurations).

This patch gives more breathing room and allows the SSI to operate
reliably by changing the fifio refill watermark to 8.

There is no change in behavior for older chips (with an 8-deep fifo).
Only the newer chips with a 15-deep fifo get the new behavior. I
suspect a new fifo depth setting could be optimized on the older
chips too, but I have not tested.

Signed-off-by: Caleb Crome <caleb@crome.org>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_ssi.c | 74 +++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 21 deletions(-)

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 50349437d961..fde08660b63b 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -224,6 +224,12 @@ struct fsl_ssi_soc_data {
  * @dbg_stats: Debugging statistics
  *
  * @soc: SoC specific data
+ *
+ * @fifo_watermark: the FIFO watermark setting.  Notifies DMA when
+ *             there are @fifo_watermark or fewer words in TX fifo or
+ *             @fifo_watermark or more empty words in RX fifo.
+ * @dma_maxburst: max number of words to transfer in one go.  So far,
+ *             this is always the same as fifo_watermark.
  */
 struct fsl_ssi_private {
 	struct regmap *regs;
@@ -263,6 +269,9 @@ struct fsl_ssi_private {
 
 	const struct fsl_ssi_soc_data *soc;
 	struct device *dev;
+
+	u32 fifo_watermark;
+	u32 dma_maxburst;
 };
 
 /*
@@ -1051,21 +1060,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev,
 	regmap_write(regs, CCSR_SSI_SRCR, srcr);
 	regmap_write(regs, CCSR_SSI_SCR, scr);
 
-	/*
-	 * Set the watermark for transmit FIFI 0 and receive FIFO 0. We don't
-	 * use FIFO 1. We program the transmit water to signal a DMA transfer
-	 * if there are only two (or fewer) elements left in the FIFO. Two
-	 * elements equals one frame (left channel, right channel). This value,
-	 * however, depends on the depth of the transmit buffer.
-	 *
-	 * We set the watermark on the same level as the DMA burstsize.  For
-	 * fiq it is probably better to use the biggest possible watermark
-	 * size.
-	 */
-	if (ssi_private->use_dma)
-		wm = ssi_private->fifo_depth - 2;
-	else
-		wm = ssi_private->fifo_depth;
+	wm = ssi_private->fifo_watermark;
 
 	regmap_write(regs, CCSR_SSI_SFCSR,
 			CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) |
@@ -1373,12 +1368,8 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev,
 		dev_dbg(&pdev->dev, "could not get baud clock: %ld\n",
 			 PTR_ERR(ssi_private->baudclk));
 
-	/*
-	 * We have burstsize be "fifo_depth - 2" to match the SSI
-	 * watermark setting in fsl_ssi_startup().
-	 */
-	ssi_private->dma_params_tx.maxburst = ssi_private->fifo_depth - 2;
-	ssi_private->dma_params_rx.maxburst = ssi_private->fifo_depth - 2;
+	ssi_private->dma_params_tx.maxburst = ssi_private->dma_maxburst;
+	ssi_private->dma_params_rx.maxburst = ssi_private->dma_maxburst;
 	ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0;
 	ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0;
 
@@ -1543,6 +1534,47 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                 /* Older 8610 DTs didn't have the fifo-depth property */
 		ssi_private->fifo_depth = 8;
 
+	/*
+	 * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't
+	 * use FIFO 1 but set the watermark appropriately nontheless.
+	 * We program the transmit water to signal a DMA transfer
+	 * if there are N elements left in the FIFO. For chips with 15-deep
+	 * FIFOs, set watermark to 8.  This allows the SSI to operate at a
+	 * high data rate without channel slipping. Behavior is unchanged
+	 * for the older chips with a fifo depth of only 8.  A value of 4
+	 * might be appropriate for the older chips, but is left at
+	 * fifo_depth-2 until sombody has a chance to test.
+	 *
+	 * We set the watermark on the same level as the DMA burstsize.  For
+	 * fiq it is probably better to use the biggest possible watermark
+	 * size.
+	 */
+	switch (ssi_private->fifo_depth) {
+	case 15:
+		/*
+		 * 2 samples is not enough when running at high data
+		 * rates (like 48kHz @ 16 bits/channel, 16 channels)
+		 * 8 seems to split things evenly and leave enough time
+		 * for the DMA to fill the FIFO before it's over/under
+		 * run.
+		 */
+		ssi_private->fifo_watermark = 8;
+		ssi_private->dma_maxburst = 8;
+		break;
+	case 8:
+	default:
+		/*
+		 * maintain old behavior for older chips.
+		 * Keeping it the same because I don't have an older
+		 * board to test with.
+		 * I suspect this could be changed to be something to
+		 * leave some more space in the fifo.
+		 */
+		ssi_private->fifo_watermark = ssi_private->fifo_depth - 2;
+		ssi_private->dma_maxburst = ssi_private->fifo_depth - 2;
+		break;
+	}
+
 	dev_set_drvdata(&pdev->dev, ssi_private);
 
 	if (ssi_private->soc->imx) {

From dd853fd216d1485ed3045ff772079cc8689a9a4a Mon Sep 17 00:00:00 2001
From: Lukasz Odzioba <lukasz.odzioba@intel.com>
Date: Wed, 28 Dec 2016 14:55:40 +0100
Subject: [PATCH 074/297] x86/cpu: Fix bootup crashes by sanitizing the
 argument of the 'clearcpuid=' command-line option

A negative number can be specified in the cmdline which will be used as
setup_clear_cpu_cap() argument. With that we can clear/set some bit in
memory predceeding boot_cpu_data/cpu_caps_cleared which may cause kernel
to misbehave. This patch adds lower bound check to setup_disablecpuid().

Boris Petkov reproduced a crash:

  [    1.234575] BUG: unable to handle kernel paging request at ffffffff858bd540
  [    1.236535] IP: memcpy_erms+0x6/0x10

Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andi.kleen@intel.com
Cc: bp@alien8.de
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: slaoub@gmail.com
Fixes: ac72e7888a61 ("x86: add generic clearcpuid=... option")
Link: http://lkml.kernel.org/r/1482933340-11857-1-git-send-email-lukasz.odzioba@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dc1697ca5191..9bab7a8a4293 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg)
 {
 	int bit;
 
-	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
 		setup_clear_cpu_cap(bit);
 	else
 		return 0;

From 754c73cf4d2463022b2c9ae208026bf22564ed06 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 2 Jan 2017 11:22:29 +0200
Subject: [PATCH 075/297] x86/cpu: Fix typo in the comment for Anniedale

The proper spelling of Anniedale SoC with 'e' in the middle. Fix typo in the
comment line in intel-family.h header.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170102092229.87036-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/intel-family.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..8167fdb67ae8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -57,7 +57,7 @@
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 

From 159d3726db12b3476bc59ea0ab0a702103d466b5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 2 Jan 2017 11:24:50 +0200
Subject: [PATCH 076/297] x86/platform/intel-mid: Rename 'spidev' to
 'mrfld_spidev'

The current implementation supports only Intel Merrifield platforms. Don't mess
with the rest of the Intel MID family by not registering device with wrong
properties.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170102092450.87229-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/Makefile              | 2 +-
 .../{platform_spidev.c => platform_mrfld_spidev.c}            | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_spidev.c => platform_mrfld_spidev.c} (91%)

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 61b5ed2b7d40..90e4f2a6625b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
 # SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
similarity index 91%
rename from arch/x86/platform/intel-mid/device_libs/platform_spidev.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 30c601b399ee..27186ad654c9 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -11,6 +11,7 @@
  * of the License.
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/sfi.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info)
 {
 	struct spi_board_info *spi_info = info;
 
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return ERR_PTR(-ENODEV);
+
 	spi_info->mode = SPI_MODE_0;
 	spi_info->controller_data = &spidev_spi_chip;
 

From 74545f63890e38520eb4d1dbedcadaa9c0dbc824 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Thu, 22 Dec 2016 17:17:40 -0800
Subject: [PATCH 077/297] perf/x86: Set pmu->module in Intel PMU modules

The conversion of Intel PMU drivers into modules did not include reference
counting. The machine will crash when attempting to  access deleted code
if an event from a module PMU is started and the module removed before the
event is destroyed.

i.e. this crashes the machine:

	$ insmod intel-rapl-perf.ko
	$ perf stat -e power/energy-cores/ -C 0 &
	$ rmmod intel-rapl-perf.ko

Set THIS_MODULE to pmu->module in Intel module PMUs so that generic code
can handle reference counting and deny rmmod while an event still exists.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1482455860-116269-1-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 2 ++
 arch/x86/events/intel/rapl.c   | 1 +
 arch/x86/events/intel/uncore.c | 1 +
 3 files changed, 4 insertions(+)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index fec8a461bdef..1076c9a77292 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static struct pmu cstate_pkg_pmu = {
@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static const struct cstate_model nhm_cstates __initconst = {
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index bd34124449b0..17c3564d087a 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void)
 	rapl_pmus->pmu.start		= rapl_pmu_event_start;
 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	rapl_pmus->pmu.module		= THIS_MODULE;
 	return 0;
 }
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 97c246f84dea..8c4ccdc3a3f3 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 			.start		= uncore_pmu_event_start,
 			.stop		= uncore_pmu_event_stop,
 			.read		= uncore_pmu_event_read,
+			.module		= THIS_MODULE,
 		};
 	} else {
 		pmu->pmu = *pmu->type->pmu;

From 753aacfd2e95df6a0caf23c03dc309020765bea9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Jan 2017 10:57:14 +0100
Subject: [PATCH 078/297] nl80211: fix sched scan netlink socket owner
 destruction

A single netlink socket might own multiple interfaces *and* a
scheduled scan request (which might belong to another interface),
so when it goes away both may need to be destroyed.

Remove the schedule_scan_stop indirection to fix this - it's only
needed for interface destruction because of the way this works
right now, with a single work taking care of all interfaces.

Cc: stable@vger.kernel.org
Fixes: 93a1e86ce10e4 ("nl80211: Stop scheduled scan if netlink client disappears")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3df85a751a85..ef5eff93a8b8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14502,13 +14502,17 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		bool schedule_destroy_work = false;
-		bool schedule_scan_stop = false;
 		struct cfg80211_sched_scan_request *sched_scan_req =
 			rcu_dereference(rdev->sched_scan_req);
 
 		if (sched_scan_req && notify->portid &&
-		    sched_scan_req->owner_nlportid == notify->portid)
-			schedule_scan_stop = true;
+		    sched_scan_req->owner_nlportid == notify->portid) {
+			sched_scan_req->owner_nlportid = 0;
+
+			if (rdev->ops->sched_scan_stop &&
+			    rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+				schedule_work(&rdev->sched_scan_stop_wk);
+		}
 
 		list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
@@ -14539,12 +14543,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 				spin_unlock(&rdev->destroy_list_lock);
 				schedule_work(&rdev->destroy_work);
 			}
-		} else if (schedule_scan_stop) {
-			sched_scan_req->owner_nlportid = 0;
-
-			if (rdev->ops->sched_scan_stop &&
-			    rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
-				schedule_work(&rdev->sched_scan_stop_wk);
 		}
 	}
 

From 60448b077ed93d227e6c117a9e87db76ff0c1911 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 4 Jan 2017 15:44:52 -0600
Subject: [PATCH 079/297] ASoC: Intel: bytcr-rt5640: fix settings in internal
 clock mode

Frequency value of zero did not make sense, use same 24.576MHz
setting and only change the clock source in idle mode

Suggested-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index e33e4777a65c..8d2fb2d6f532 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -142,7 +142,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w,
 		 * for Jack detection and button press
 		 */
 		ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_RCCLK,
-					     0,
+					     48000 * 512,
 					     SND_SOC_CLOCK_IN);
 		if (!ret) {
 			if ((byt_rt5640_quirk & BYT_RT5640_MCLK_EN) && priv->mclk)

From 08f9572671c8047e7234cbf150869aa3c3d59a97 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 5 Jan 2017 14:25:59 +0100
Subject: [PATCH 080/297] HID: ignore Petzl USB headlamp

This headlamp contains a dummy HID descriptor which pretends to be
a mouse-like device, but can't be used as a mouse at all.

Reported-by: Lukas Ocilka <lukas.ocilka@suse.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 1 +
 drivers/hid/hid-ids.h  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index cff060b56da9..ea36b557d5ee 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2496,6 +2496,7 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) },
 #if IS_ENABLED(CONFIG_MOUSE_SYNAPTICS_USB)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 54bd22dc1411..f46f2c5117fa 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -816,6 +816,9 @@
 #define USB_VENDOR_ID_PETALYNX		0x18b1
 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE	0x0037
 
+#define USB_VENDOR_ID_PETZL		0x2122
+#define USB_DEVICE_ID_PETZL_HEADLAMP	0x1234
+
 #define USB_VENDOR_ID_PHILIPS		0x0471
 #define USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE 0x0617
 

From 9b60047a9c950e3fde186466774ffd1ab1104d4e Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Thu, 5 Jan 2017 02:54:27 -0500
Subject: [PATCH 081/297] r8169: fix the typo in the comment

>From the realtek data sheet, the PID0 should be bit 0.

Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 44389c90056a..8f1623bf2134 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -696,7 +696,7 @@ enum rtl_tx_desc_bit_1 {
 enum rtl_rx_desc_bit {
 	/* Rx private */
 	PID1		= (1 << 18), /* Protocol ID bit 1/2 */
-	PID0		= (1 << 17), /* Protocol ID bit 2/2 */
+	PID0		= (1 << 17), /* Protocol ID bit 0/2 */
 
 #define RxProtoUDP	(PID1)
 #define RxProtoTCP	(PID0)

From 28ca833ecf89c585a9543fb21aef6b2bdbbaa48a Mon Sep 17 00:00:00 2001
From: JackieLiu <liuyun01@kylinos.cn>
Date: Tue, 13 Dec 2016 13:55:27 +0800
Subject: [PATCH 082/297] md/raid5-cache: removes unnecessary write-through
 mode judgments

The write-through mode has been returned in front of the function,
do not need to do it again.

Signed-off-by: JackieLiu <liuyun01@kylinos.cn>
Reviewed-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index d7bfb6fc8aef..49aea4231084 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2418,9 +2418,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	if (do_wakeup)
 		wake_up(&conf->wait_for_overlap);
 
-	if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
-		return;
-
 	spin_lock_irq(&conf->log->stripe_in_journal_lock);
 	list_del_init(&sh->r5c);
 	spin_unlock_irq(&conf->log->stripe_in_journal_lock);

From 3c66abbaaf69671dfd3eb9fa7740b5d7ec688231 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Dec 2016 15:38:01 -0800
Subject: [PATCH 083/297] md/r5cache: simplify handling of sh->log_start in
 recovery

We only need to update sh->log_start at the end of recovery,
which is r5c_recovery_rewrite_data_only_stripes(), so it is not
necessary to set it before that. In this patch, log_start is
removed from r5c_recovery_alloc_stripe().

After updating all sh->log_start, rewrite_data_only_stripes()
also updates log->next_checkpoints to the last sh->log_start.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 49aea4231084..b178a8fef266 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1682,8 +1682,7 @@ out:
 
 static struct stripe_head *
 r5c_recovery_alloc_stripe(struct r5conf *conf,
-			  sector_t stripe_sect,
-			  sector_t log_start)
+			  sector_t stripe_sect)
 {
 	struct stripe_head *sh;
 
@@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf,
 		return NULL;  /* no more stripe available */
 
 	r5l_recovery_reset_stripe(sh);
-	sh->log_start = log_start;
 
 	return sh;
 }
@@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 						stripe_sect);
 
 		if (!sh) {
-			sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos);
+			sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
 			/*
 			 * cannot get stripe from raid5_get_active_stripe
 			 * try replay some stripes
@@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 				r5c_recovery_replay_stripes(
 					cached_stripe_list, ctx);
 				sh = r5c_recovery_alloc_stripe(
-					conf, stripe_sect, ctx->pos);
+					conf, stripe_sect);
 			}
 			if (!sh) {
 				pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
@@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 					conf->min_nr_stripes * 2);
 				raid5_set_cache_size(mddev,
 						     conf->min_nr_stripes * 2);
-				sh = r5c_recovery_alloc_stripe(
-					conf, stripe_sect, ctx->pos);
+				sh = r5c_recovery_alloc_stripe(conf,
+							       stripe_sect);
 			}
 			if (!sh) {
 				pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
@@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
 			if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
 			    test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
 				r5l_recovery_replay_one_stripe(conf, sh, ctx);
-				sh->log_start = ctx->pos;
 				list_move_tail(&sh->lru, cached_stripe_list);
 			}
 			r5l_recovery_load_data(log, sh, ctx, payload,
@@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log,
 			set_bit(R5_UPTODATE, &dev->flags);
 		}
 	}
-	list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
-	atomic_inc(&log->stripe_in_journal_count);
 }
 
 /*
@@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 	struct stripe_head *sh, *next;
 	struct mddev *mddev = log->rdev->mddev;
 	struct page *page;
+	sector_t next_checkpoint = MaxSector;
 
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
@@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		return -ENOMEM;
 	}
 
+	WARN_ON(list_empty(&ctx->cached_list));
+
 	list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
 		struct r5l_meta_block *mb;
 		int i;
@@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
 			     REQ_OP_WRITE, REQ_FUA, false);
 		sh->log_start = ctx->pos;
+		list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
+		atomic_inc(&log->stripe_in_journal_count);
 		ctx->pos = write_pos;
 		ctx->seq += 1;
-
+		next_checkpoint = sh->log_start;
 		list_del_init(&sh->lru);
 		raid5_release_stripe(sh);
 	}
+	log->next_checkpoint = next_checkpoint;
 	__free_page(page);
 	return 0;
 }
@@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log)
 	struct r5l_recovery_ctx ctx;
 	int ret;
 	sector_t pos;
-	struct stripe_head *sh;
 
 	ctx.pos = log->last_checkpoint;
 	ctx.seq = log->last_cp_seq;
@@ -2164,9 +2164,6 @@ static int r5l_recovery_log(struct r5l_log *log)
 		log->next_checkpoint = ctx.pos;
 		r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
 		ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-	} else {
-		sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
-		log->next_checkpoint = sh->log_start;
 	}
 
 	if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))

From d2250f105f18a43fdab17421bd80b0ffc9fcc53f Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Dec 2016 15:38:02 -0800
Subject: [PATCH 084/297] md/r5cache: assign conf->log before r5l_load_log()

r5l_load_log() calls functions that requires a proper conf->log,
for example, r5c_is_writeback(). Therefore, we should set
conf->log before calling r5l_load_log(). If r5l_load_log() fails,
conf->log is set back to NULL.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b178a8fef266..bff1b4a949e8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2633,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	spin_lock_init(&log->stripe_in_journal_lock);
 	atomic_set(&log->stripe_in_journal_count, 0);
 
+	rcu_assign_pointer(conf->log, log);
+
 	if (r5l_load_log(log))
 		goto error;
 
-	rcu_assign_pointer(conf->log, log);
 	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
 	return 0;
 
 error:
+	rcu_assign_pointer(conf->log, NULL);
 	md_unregister_thread(&log->reclaim_thread);
 reclaim_thread:
 	mempool_destroy(log->meta_pool);

From 99f17890f04cff0262de7393c60a2f6d9c9c7e71 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Dec 2016 00:52:30 +0000
Subject: [PATCH 085/297] md/r5cache: fix spelling mistake on "recoverying"

Trivial fix to spelling mistake "recoverying" to "recovering" in
pr_dbg message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index bff1b4a949e8..0e8ed2c327b0 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2170,7 +2170,7 @@ static int r5l_recovery_log(struct r5l_log *log)
 		pr_debug("md/raid:%s: starting from clean shutdown\n",
 			 mdname(mddev));
 	else {
-		pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
+		pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
 			 mdname(mddev), ctx.data_only_stripes,
 			 ctx.data_parity_stripes);
 

From 394ed8e4743b0cfc5496fe49059fbfc2bc8eae35 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 4 Jan 2017 16:10:19 -0800
Subject: [PATCH 086/297] md: cleanup mddev flag clear for takeover

Commit 6995f0b (md: takeover should clear unrelated bits) clear
unrelated bits, but it's quite fragile. To avoid error in the future,
define a macro for unsupported mddev flags for each raid type and use it
to clear unsupported mddev flags. This should be less error-prone.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.h    |  8 ++++++++
 drivers/md/raid0.c | 12 ++++++++----
 drivers/md/raid1.c |  8 ++++++--
 drivers/md/raid5.c |  5 ++++-
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/md/md.h b/drivers/md/md.h
index e38936d05df1..2a514036a83d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 				int is_new);
 struct md_cluster_info;
 
+/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
 enum mddev_flags {
 	MD_ARRAY_FIRST_USE,	/* First use of array, needs initialization */
 	MD_CLOSING,		/* If set, we are closing the array, do not open
@@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev)
 {
 	return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
 }
+
+/* clear unsupported mddev_flags */
+static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
+	unsigned long unsupported_flags)
+{
+	mddev->flags &= ~unsupported_flags;
+}
 #endif /* _MD_MD_H */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a162fedeb51a..848365d474f3 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -26,6 +26,11 @@
 #include "raid0.h"
 #include "raid5.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS		\
+	((1L << MD_HAS_JOURNAL) |	\
+	 (1L << MD_JOURNAL_CLEAN) |	\
+	 (1L << MD_FAILFAST_SUPPORTED))
+
 static int raid0_congested(struct mddev *mddev, int bits)
 {
 	struct r0conf *conf = mddev->private;
@@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
 	mddev->delta_disks = -1;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_HAS_JOURNAL, &mddev->flags);
-	clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 
@@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
 	mddev->degraded = 0;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 	return priv_conf;
@@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
 	mddev->raid_disks = 1;
 	/* make sure it will be not marked as dirty */
 	mddev->recovery_cp = MaxSector;
-	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
 
 	create_strip_zones(mddev, &priv_conf);
 	return priv_conf;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 14422407e520..7b0f647bcccb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -42,6 +42,10 @@
 #include "raid1.h"
 #include "bitmap.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS		\
+	((1L << MD_HAS_JOURNAL) |	\
+	 (1L << MD_JOURNAL_CLEAN))
+
 /*
  * Number of guaranteed r1bios in case of extreme VM load:
  */
@@ -3257,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev)
 		if (!IS_ERR(conf)) {
 			/* Array must appear to be quiesced */
 			conf->array_frozen = 1;
-			clear_bit(MD_HAS_JOURNAL, &mddev->flags);
-			clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
+			mddev_clear_unsupported_flags(mddev,
+				UNSUPPORTED_MDDEV_FLAGS);
 		}
 		return conf;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 06d7279bdd04..7b1da6e95a56 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -62,6 +62,8 @@
 #include "raid0.h"
 #include "bitmap.h"
 
+#define UNSUPPORTED_MDDEV_FLAGS	(1L << MD_FAILFAST_SUPPORTED)
+
 #define cpu_to_group(cpu) cpu_to_node(cpu)
 #define ANY_GROUP NUMA_NO_NODE
 
@@ -7830,7 +7832,8 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
 
 	ret = setup_conf(mddev);
 	if (!IS_ERR_VALUE(ret))
-		clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+		mddev_clear_unsupported_flags(mddev,
+			UNSUPPORTED_MDDEV_FLAGS);
 	return ret;
 }
 

From a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Wed, 14 Dec 2016 11:59:34 +0100
Subject: [PATCH 087/297] selftests: do not require bash for the generated test

Nothing in this minimal script seems to require bash. We often run these
tests on embedded devices where the only shell available is the busybox
ash. Use sh instead.

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 71b05891a6a1..831022b12848 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -90,7 +90,7 @@ ifdef INSTALL_PATH
 	done;
 
 	@# Ask all targets to emit their test scripts
-	echo "#!/bin/bash" > $(ALL_SCRIPT)
+	echo "#!/bin/sh" > $(ALL_SCRIPT)
 	echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
 	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
 

From d979e13a3fa9067c8cd46e292ed859626d443996 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Wed, 14 Dec 2016 11:58:20 +0100
Subject: [PATCH 088/297] selftests: do not require bash to run bpf tests

Nothing in this minimal script seems to require bash. We often run these
tests on embedded devices where the only shell available is the busybox
ash. Use sh instead.

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Cc: stable@vger.kernel.org
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/bpf/test_kmod.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 92e627adf354..6d58cca8e235 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 
 SRC_TREE=../../../../
 

From 3659f98b5375d195f1870c3e508fe51e52206839 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Wed, 14 Dec 2016 11:59:57 +0100
Subject: [PATCH 089/297] selftests: do not require bash to run netsocktests
 testcase

Nothing in this minimal script seems to require bash. We often run these
tests on embedded devices where the only shell available is the busybox
ash. Use sh instead.

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/run_netsocktests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
index c09a682df56a..16058bbea7a8 100755
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 
 echo "--------------------"
 echo "running socket test"

From 7738789fba09108a28a5fb4739595d9a0a2f85fe Mon Sep 17 00:00:00 2001
From: Colin King <colin.king@canonical.com>
Date: Tue, 27 Dec 2016 16:17:21 +0000
Subject: [PATCH 090/297] selftests: x86/pkeys: fix spelling mistake:
 "itertation" -> "iteration"

Fix spelling mistake in print test pass message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/protection_keys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bdd58c78902e..df9e0a0cdf29 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -1367,7 +1367,7 @@ void run_tests_once(void)
 		tracing_off();
 		close_test_fds();
 
-		printf("test %2d PASSED (itertation %d)\n", test_nr, iteration_nr);
+		printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
 		dprintf1("======================\n\n");
 	}
 	iteration_nr++;

From 1c3415a06b1016a596bfe59e0cfee56c773aa958 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 5 Jan 2017 14:14:54 -0800
Subject: [PATCH 091/297] Input: elants_i2c - avoid divide by 0 errors on bad
 touchscreen data

The following crash may be seen if bad data is received from the
touchscreen.

[ 2189.425150] elants_i2c i2c-ELAN0001:00: unknown packet ff ff ff ff
[ 2189.430738] divide error: 0000 [#1] PREEMPT SMP
[ 2189.434679] gsmi: Log Shutdown Reason 0x03
[ 2189.434689] Modules linked in: ip6t_REJECT nf_reject_ipv6 rfcomm evdi
uinput uvcvideo cmac videobuf2_vmalloc videobuf2_memops snd_hda_codec_hdmi
i2c_dev videobuf2_core snd_soc_sst_cht_bsw_rt5645 snd_hda_intel
snd_intel_sst_acpi btusb btrtl btbcm btintel bluetooth snd_soc_sst_acpi
snd_hda_codec snd_intel_sst_core snd_hwdep snd_soc_sst_mfld_platform
snd_hda_core snd_soc_rt5645 memconsole_x86_legacy memconsole zram snd_soc_rl6231
fuse ip6table_filter iwlmvm iwlwifi iwl7000_mac80211 cfg80211 iio_trig_sysfs
joydev cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer
kfifo_buf industrialio snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq
snd_seq_device ppp_async ppp_generic slhc tun
[ 2189.434866] CPU: 0 PID: 106 Comm: irq/184-ELAN000 Tainted: G        W
3.18.0-13101-g57e8190 #1
[ 2189.434883] Hardware name: GOOGLE Ultima, BIOS Google_Ultima.7287.131.43 07/20/2016
[ 2189.434898] task: ffff88017a0b6d80 ti: ffff88017a2bc000 task.ti: ffff88017a2bc000
[ 2189.434913] RIP: 0010:[<ffffffffbecc48d5>]  [<ffffffffbecc48d5>] elants_i2c_irq+0x190/0x200
[ 2189.434937] RSP: 0018:ffff88017a2bfd98  EFLAGS: 00010293
[ 2189.434948] RAX: 0000000000000000 RBX: ffff88017a967828 RCX: ffff88017a9678e8
[ 2189.434962] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000000
[ 2189.434975] RBP: ffff88017a2bfdd8 R08: 00000000000003e8 R09: 0000000000000000
[ 2189.434989] R10: 0000000000000000 R11: 000000000044a2bd R12: ffff88017a991800
[ 2189.435001] R13: ffffffffbe8a2a53 R14: ffff88017a0b6d80 R15: ffff88017a0b6d80
[ 2189.435011] FS:  0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000
[ 2189.435022] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 2189.435030] CR2: 00007f678d94b000 CR3: 000000003f41a000 CR4: 00000000001007f0
[ 2189.435039] Stack:
[ 2189.435044]  ffff88017a2bfda8 ffff88017a9678e8 646464647a2bfdd8 0000000006e09574
[ 2189.435060]  0000000000000000 ffff88017a088b80 ffff88017a921000 ffffffffbe8a2a53
[ 2189.435074]  ffff88017a2bfe08 ffffffffbe8a2a73 ffff88017a0b6d80 0000000006e09574
[ 2189.435089] Call Trace:
[ 2189.435101]  [<ffffffffbe8a2a53>] ? irq_thread_dtor+0xa9/0xa9
[ 2189.435112]  [<ffffffffbe8a2a73>] irq_thread_fn+0x20/0x40
[ 2189.435123]  [<ffffffffbe8a2be1>] irq_thread+0x14e/0x222
[ 2189.435135]  [<ffffffffbee8cbeb>] ? __schedule+0x3b3/0x57a
[ 2189.435145]  [<ffffffffbe8a29aa>] ? wake_threads_waitq+0x2d/0x2d
[ 2189.435156]  [<ffffffffbe8a2a93>] ? irq_thread_fn+0x40/0x40
[ 2189.435168]  [<ffffffffbe87c385>] kthread+0x10e/0x116
[ 2189.435178]  [<ffffffffbe87c277>] ? __kthread_parkme+0x67/0x67
[ 2189.435189]  [<ffffffffbee900ac>] ret_from_fork+0x7c/0xb0
[ 2189.435199]  [<ffffffffbe87c277>] ? __kthread_parkme+0x67/0x67
[ 2189.435208] Code: ff ff eb 73 0f b6 bb c1 00 00 00 83 ff 03 7e 13 49 8d 7c
24 20 ba 04 00 00 00 48 c7 c6 8a cd 21 bf eb 4d 0f b6 83 c2 00 00 00 99 <f7> ff
83 f8 37 75 15 48 6b f7 37 4c 8d a3 c4 00 00 00 4c 8d ac
[ 2189.435312] RIP  [<ffffffffbecc48d5>] elants_i2c_irq+0x190/0x200
[ 2189.435323]  RSP <ffff88017a2bfd98>
[ 2189.435350] ---[ end trace f4945345a75d96dd ]---
[ 2189.443841] Kernel panic - not syncing: Fatal exception
[ 2189.444307] Kernel Offset: 0x3d800000 from 0xffffffff81000000
	(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2189.444519] gsmi: Log Shutdown Reason 0x02

The problem was seen with a 3.18 based kernel, but there is no reason
to believe that the upstream code is safe.

Fixes: 66aee90088da2 ("Input: add support for Elan eKTH I2C touchscreens")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/elants_i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index 02aec284deca..3e6003d32e56 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -914,9 +914,9 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
 
 		case QUEUE_HEADER_NORMAL:
 			report_count = ts->buf[FW_HDR_COUNT];
-			if (report_count > 3) {
+			if (report_count == 0 || report_count > 3) {
 				dev_err(&client->dev,
-					"too large report count: %*ph\n",
+					"bad report count: %*ph\n",
 					HEADER_SIZE, ts->buf);
 				break;
 			}

From 9698b6f473555a722bf81a3371998427d5d27bde Mon Sep 17 00:00:00 2001
From: Satish Kharat <satishkh@cisco.com>
Date: Wed, 14 Dec 2016 13:20:41 -0800
Subject: [PATCH 092/297] scsi: fnic: Avoid sending reset to firmware when
 another reset is in progress

This fix is to avoid calling fnic_fw_reset_handler through
fnic_host_reset when a finc reset is alreay in progress.

Signed-off-by: Satish Kharat <satishkh@cisco.com>
Signed-off-by: Sesidhar Baddela <sebaddel@cisco.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fnic/fnic.h      |  1 +
 drivers/scsi/fnic/fnic_scsi.c | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 9ddc9200e0a4..9e4b7709043e 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -248,6 +248,7 @@ struct fnic {
 	struct completion *remove_wait; /* device remove thread blocks */
 
 	atomic_t in_flight;		/* io counter */
+	bool internal_reset_inprogress;
 	u32 _reserved;			/* fill hole */
 	unsigned long state_flags;	/* protected by host lock */
 	enum fnic_state state;
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 2544a37ece0a..adb3d5871e74 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2581,6 +2581,19 @@ int fnic_host_reset(struct scsi_cmnd *sc)
 	unsigned long wait_host_tmo;
 	struct Scsi_Host *shost = sc->device->host;
 	struct fc_lport *lp = shost_priv(shost);
+	struct fnic *fnic = lport_priv(lp);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (fnic->internal_reset_inprogress == 0) {
+		fnic->internal_reset_inprogress = 1;
+	} else {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"host reset in progress skipping another host reset\n");
+		return SUCCESS;
+	}
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 
 	/*
 	 * If fnic_reset is successful, wait for fabric login to complete
@@ -2601,6 +2614,9 @@ int fnic_host_reset(struct scsi_cmnd *sc)
 		}
 	}
 
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->internal_reset_inprogress = 0;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 	return ret;
 }
 

From 0371adcdaca92912baaa3256ed13e058a016e62d Mon Sep 17 00:00:00 2001
From: Burak Ok <burak-kernel@bur0k.de>
Date: Wed, 21 Dec 2016 14:45:53 +0100
Subject: [PATCH 093/297] scsi: snic: Return error code on memory allocation
 failure

If a call to mempool_create_slab_pool() in snic_probe() returns NULL,
return -ENOMEM to indicate failure. mempool_creat_slab_pool() only fails
if it cannot allocate memory.

https://bugzilla.kernel.org/show_bug.cgi?id=189061

Reported-by: bianpan2010@ruc.edu.cn
Signed-off-by: Burak Ok <burak-kernel@bur0k.de>
Signed-off-by: Andreas Schaertl <andreas.schaertl@fau.de>
Acked-by: Narsimhulu Musini <nmusini@cisco.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/snic/snic_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c
index 396b32dca074..7cf70aaec0ba 100644
--- a/drivers/scsi/snic/snic_main.c
+++ b/drivers/scsi/snic/snic_main.c
@@ -591,6 +591,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "dflt sgl pool creation failed\n");
 
+		ret = -ENOMEM;
 		goto err_free_res;
 	}
 
@@ -601,6 +602,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "max sgl pool creation failed\n");
 
+		ret = -ENOMEM;
 		goto err_free_dflt_sgl_pool;
 	}
 
@@ -611,6 +613,7 @@ snic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pool) {
 		SNIC_HOST_ERR(shost, "snic tmreq info pool creation failed.\n");
 
+		ret = -ENOMEM;
 		goto err_free_max_sgl_pool;
 	}
 

From 2d1148f0f45079d25a0fa0d67e4fdb2a656d12fb Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Fri, 23 Dec 2016 20:40:19 -0800
Subject: [PATCH 094/297] scsi: bfa: Increase requested firmware version to
 3.2.5.1

bna & bfa firmware version 3.2.5.1 was submitted to linux-firmware on
Feb 17 19:10:20 2015 -0500 in 0ab54ff1dc ("linux-firmware: Add QLogic BR
Series Adapter Firmware").

bna was updated to use the newer firmware on Feb 19 16:02:32 2015 -0500 in
3f307c3d70 ("bna: Update the Driver and Firmware Version")

bfa was not updated. I presume this was an oversight but it broke support
for bfa+bna cards such as the following
	04:00.0 Fibre Channel [0c04]: Brocade Communications Systems, Inc.
		1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.1 Fibre Channel [0c04]: Brocade Communications Systems, Inc.
		1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.2 Ethernet controller [0200]: Brocade Communications Systems,
		Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)
	04:00.3 Ethernet controller [0200]: Brocade Communications Systems,
		Inc. 1010/1020/1007/1741 10Gbps CNA [1657:0014] (rev 01)

Currently, if the bfa module is loaded first, bna fails to probe the
respective devices with
[  215.026787] bna: QLogic BR-series 10G Ethernet driver - version: 3.2.25.1
[  215.043707] bna 0000:04:00.2: bar0 mapped to ffffc90001fc0000, len 262144
[  215.060656] bna 0000:04:00.2: initialization failed err=1
[  215.073893] bna 0000:04:00.3: bar0 mapped to ffffc90002040000, len 262144
[  215.090644] bna 0000:04:00.3: initialization failed err=1

Whereas if bna is loaded first, bfa fails with
[  249.592109] QLogic BR-series BFA FC/FCOE SCSI driver - version: 3.2.25.0
[  249.610738] bfa 0000:04:00.0: Running firmware version is incompatible with the driver version
[  249.833513] bfa 0000:04:00.0: bfa init failed
[  249.833919] scsi host6: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.0 driver: 3.2.25.0
[  249.841446] bfa 0000:04:00.1: Running firmware version is incompatible with the driver version
[  250.045449] bfa 0000:04:00.1: bfa init failed
[  250.045962] scsi host7: QLogic BR-series FC/FCOE Adapter, hwpath: 0000:04:00.1 driver: 3.2.25.0

Increase bfa's requested firmware version. Also increase the driver
version.  I only tested that all of the devices probe without error.

Reported-by: Tim Ehlers <tehlers@gwdg.de>
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bfa/bfad.c     | 6 +++---
 drivers/scsi/bfa/bfad_drv.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 9d253cb83ee7..e70410beb83a 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -64,9 +64,9 @@ int		max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS;
 u32	bfi_image_cb_size, bfi_image_ct_size, bfi_image_ct2_size;
 u32	*bfi_image_cb, *bfi_image_ct, *bfi_image_ct2;
 
-#define BFAD_FW_FILE_CB		"cbfw-3.2.3.0.bin"
-#define BFAD_FW_FILE_CT		"ctfw-3.2.3.0.bin"
-#define BFAD_FW_FILE_CT2	"ct2fw-3.2.3.0.bin"
+#define BFAD_FW_FILE_CB		"cbfw-3.2.5.1.bin"
+#define BFAD_FW_FILE_CT		"ctfw-3.2.5.1.bin"
+#define BFAD_FW_FILE_CT2	"ct2fw-3.2.5.1.bin"
 
 static u32 *bfad_load_fwimg(struct pci_dev *pdev);
 static void bfad_free_fwimg(void);
diff --git a/drivers/scsi/bfa/bfad_drv.h b/drivers/scsi/bfa/bfad_drv.h
index f9e862093a25..cfcfff48e8e1 100644
--- a/drivers/scsi/bfa/bfad_drv.h
+++ b/drivers/scsi/bfa/bfad_drv.h
@@ -58,7 +58,7 @@
 #ifdef BFA_DRIVER_VERSION
 #define BFAD_DRIVER_VERSION    BFA_DRIVER_VERSION
 #else
-#define BFAD_DRIVER_VERSION    "3.2.25.0"
+#define BFAD_DRIVER_VERSION    "3.2.25.1"
 #endif
 
 #define BFAD_PROTO_NAME FCPI_NAME

From a33d331761bc5dd330499ca5ceceb67f0640a8e6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 5 Jan 2017 10:26:38 +0100
Subject: [PATCH 095/297] x86/CPU/AMD: Fix Bulldozer topology

The following commit:

  8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id")

... broke the initial strategy for Bulldozer-based cores' topology,
where we consider each thread of a compute unit a standalone core
and not a HT or SMT thread.

Revert to the firmware-supplied core_id numbering and do not make
them thread siblings as we don't consider them for such even if they
technically are, more or less.

Reported-and-tested-by: Brice Goglin <Brice.Goglin@inria.fr>
Tested-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # v4.6+
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id")
Link: http://lkml.kernel.org/r/20170105092638.5247-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 71cae73a5076..1d3167269a67 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
 
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		node_id = ecx & 7;
-
-		/* get compute unit information */
-		smp_num_siblings = ((ebx >> 8) & 3) + 1;
-		c->x86_max_cores /= smp_num_siblings;
-		c->cpu_core_id = ebx & 0xff;
+		node_id = cpuid_ecx(0x8000001e) & 7;
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we

From 1ebb71143758f45dc0fa76e2f48429e13b16d110 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 6 Jan 2017 15:33:36 +0100
Subject: [PATCH 096/297] HID: hid-cypress: validate length of report

Make sure we have enough of a report structure to validate before
looking at it.

Reported-by: Benoit Camredon <benoit.camredon@airbus.com>
Tested-by: Benoit Camredon <benoit.camredon@airbus.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-cypress.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index 1b764d1745f3..1689568b597d 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -39,6 +39,9 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX))
 		return rdesc;
 
+	if (*rsize < 4)
+		return rdesc;
+
 	for (i = 0; i < *rsize - 4; i++)
 		if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) {
 			rdesc[i] = 0x19;

From bc65a326c579e93a5c2120a65ede72f11369ee5a Mon Sep 17 00:00:00 2001
From: Jeeja KP <jeeja.kp@intel.com>
Date: Mon, 2 Jan 2017 09:50:05 +0530
Subject: [PATCH 097/297] ASoC: Intel: Skylake: Release FW ctx in cleanup

Saved firmware ctx was not never released, so release Firmware
ctx in cleanup routine.

Signed-off-by: Jeeja KP <jeeja.kp@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-sst.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c
index 8fc3178bc79c..b30bd384c8d3 100644
--- a/sound/soc/intel/skylake/skl-sst.c
+++ b/sound/soc/intel/skylake/skl-sst.c
@@ -515,6 +515,9 @@ EXPORT_SYMBOL_GPL(skl_sst_init_fw);
 
 void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx)
 {
+
+	if (ctx->dsp->fw)
+		release_firmware(ctx->dsp->fw);
 	skl_clear_module_table(ctx->dsp);
 	skl_freeup_uuid_list(ctx);
 	skl_ipc_free(&ctx->ipc);

From 9f169b9f52a4afccdab7a7d2311b0c53a78a1e6b Mon Sep 17 00:00:00 2001
From: Patrick Lai <plai@codeaurora.org>
Date: Sat, 31 Dec 2016 22:44:39 -0800
Subject: [PATCH 098/297] ASoC: dpcm: Avoid putting stream state to STOP when
 FE stream is paused

When multiple front-ends are using the same back-end, putting state of a
front-end to STOP state upon receiving pause command will result in backend
stream getting released by DPCM framework unintentionally. In order to
avoid backend to be released when another active front-end stream is
present, put the stream state to PAUSED state instead of STOP state.

Signed-off-by: Patrick Lai <plai@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index e7a1eaa2772f..6aba14009c92 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2184,9 +2184,11 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd)
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
-	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP;
 		break;
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED;
+		break;
 	}
 
 out:

From 978d3639fd13d987950e4ce85c8737ae92154b2c Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 4 Jan 2017 22:18:24 +0300
Subject: [PATCH 099/297] sh_eth: fix EESIPR values for SH77{34|63}

As the SH77{34|63} manuals are freely available,  I've checked the EESIPR
values written against the manuals, and they appeared to set the reserved
bits 11-15 (which should be 0 on write). Fix those EESIPR values.

Fixes: 380af9e390ec ("net: sh_eth: CPU dependency code collect to "struct sh_eth_cpu_data"")
Fixes: f5d12767c8fd ("sh_eth: get SH77{34|63} support out of #ifdef")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 00fafabab1d0..3b388f5704c5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -802,7 +802,7 @@ static struct sh_eth_cpu_data sh7734_data = {
 
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
-	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
+	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003f07ff,
 
 	.tx_check	= EESR_TC1 | EESR_FTC,
 	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
@@ -832,7 +832,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
-	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
+	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003f07ff,
 
 	.tx_check	= EESR_TC1 | EESR_FTC,
 	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |

From 0f1f9cbc04dbb3cc310f70a11cba0cf1f2109d9c Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Thu, 5 Jan 2017 00:29:32 +0300
Subject: [PATCH 100/297] sh_eth: R8A7740 supports packet shecksumming

The R8A7740 GEther controller supports the packet checksum offloading
but the 'hw_crc' (bad name, I'll fix it) flag isn't set in the R8A7740
data,  thus CSMR isn't cleared...

Fixes: 73a0d907301e ("net: sh_eth: add support R8A7740")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 3b388f5704c5..f729a6b43958 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -574,6 +574,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.hw_crc		= 1,
 	.tsu		= 1,
 	.select_mii	= 1,
 	.shift_rd0	= 1,

From 896b4db685cf06bd7d50ed22c53ebd069e0b90e9 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 4 Jan 2017 15:07:16 -0600
Subject: [PATCH 101/297] amd-xgbe: Fix IRQ processing when running in single
 IRQ mode

When running in single IRQ mode, the additional IRQ routines were being
skipped because only the XGMAC interrupt status was being checked.
Update the code so that the additional IRQ routines are checked whenever
an interrupt is received.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 155190db682d..9943629fcbf9 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -539,6 +539,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		}
 	}
 
+isr_done:
 	/* If there is not a separate AN irq, handle it here */
 	if (pdata->dev_irq == pdata->an_irq)
 		pdata->phy_if.an_isr(irq, pdata);
@@ -551,7 +552,6 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
 		pdata->i2c_if.i2c_isr(irq, pdata);
 
-isr_done:
 	return IRQ_HANDLED;
 }
 

From 5ca7d1ca77dc23934504b95a96d2660d345f83c2 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 5 Jan 2017 14:48:07 -0600
Subject: [PATCH 102/297] net: phy: dp83867: fix irq generation

For proper IRQ generation by DP83867 phy the INT/PWDN pin has to be
programmed as an interrupt output instead of a Powerdown input in
Configuration Register 3 (CFG3), Address 0x001E, bit 7 INT_OE = 1. The
current driver doesn't do this and as result IRQs will not be generated by
DP83867 phy even if they are properly configured in DT.

Hence, fix IRQ generation by properly configuring CFG3.INT_OE bit and
ensure that Link Status Change (LINK_STATUS_CHNG_INT) and Auto-Negotiation
Complete (AUTONEG_COMP_INT) interrupt are enabled. After this the DP83867
driver will work properly in interrupt enabled mode.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83867.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 1b639242f9e2..e84ae084e259 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -29,6 +29,7 @@
 #define MII_DP83867_MICR	0x12
 #define MII_DP83867_ISR		0x13
 #define DP83867_CTRL		0x1f
+#define DP83867_CFG3		0x1e
 
 /* Extended Registers */
 #define DP83867_RGMIICTL	0x0032
@@ -98,6 +99,8 @@ static int dp83867_config_intr(struct phy_device *phydev)
 		micr_status |=
 			(MII_DP83867_MICR_AN_ERR_INT_EN |
 			MII_DP83867_MICR_SPEED_CHNG_INT_EN |
+			MII_DP83867_MICR_AUTONEG_COMP_INT_EN |
+			MII_DP83867_MICR_LINK_STS_CHNG_INT_EN |
 			MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN |
 			MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN);
 
@@ -214,6 +217,13 @@ static int dp83867_config_init(struct phy_device *phydev)
 		}
 	}
 
+	/* Enable Interrupt output INT_OE in CFG3 register */
+	if (phy_interrupt_is_valid(phydev)) {
+		val = phy_read(phydev, DP83867_CFG3);
+		val |= BIT(7);
+		phy_write(phydev, DP83867_CFG3, val);
+	}
+
 	return 0;
 }
 

From 93e246f783e6bd1bc64fdfbfe68b18161f69b28e Mon Sep 17 00:00:00 2001
From: David Forster <dforster@brocade.com>
Date: Fri, 6 Jan 2017 10:27:59 +0000
Subject: [PATCH 103/297] vti6: fix device register to report IFLA_INFO_KIND

vti6 interface is registered before the rtnl_link_ops block
is attached. As a result the resulting RTM_NEWLINK is missing
IFLA_INFO_KIND. Re-order attachment of rtnl_link_ops block to fix.

Signed-off-by: Dave Forster <dforster@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index f4b4a4a5f4ba..d82042c8d8fd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -189,12 +189,12 @@ static int vti6_tnl_create2(struct net_device *dev)
 	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 	int err;
 
+	dev->rtnl_link_ops = &vti6_link_ops;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out;
 
 	strcpy(t->parms.name, dev->name);
-	dev->rtnl_link_ops = &vti6_link_ops;
 
 	dev_hold(dev);
 	vti6_tnl_link(ip6n, t);

From 7f4c4f80fd22ec7722e778c1d099e828d2b5dc40 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 19 Dec 2016 13:30:13 -0500
Subject: [PATCH 104/297] MAINTAINERS: Update mailing list for radeon and
 amdgpu

amdgpu and radeon development has moved to this list.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c7b8cf1240d9..cb4611867a88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4116,7 +4116,7 @@ F:	drivers/gpu/drm/cirrus/
 RADEON and AMDGPU DRM DRIVERS
 M:	Alex Deucher <alexander.deucher@amd.com>
 M:	Christian König <christian.koenig@amd.com>
-L:	dri-devel@lists.freedesktop.org
+L:	amd-gfx@lists.freedesktop.org
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Supported
 F:	drivers/gpu/drm/radeon/

From c4642a479fac9f5c224ff7425d86c427b94011af Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Wed, 14 Dec 2016 15:32:28 -0500
Subject: [PATCH 105/297] drm/amd/amdgpu: add Polaris12 support (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: agd: squash in various fixes
v3: agd: squash in:
drm/amdgpu: remove unnecessary smc sk firmware for polaris12

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ken Wang  <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c    |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  5 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  5 ++++
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c     | 13 ++++++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 31 ++++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      |  5 ++++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c     |  6 +++++
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c      |  5 ++--
 drivers/gpu/drm/amd/amdgpu/vi.c            | 10 +++++++
 drivers/gpu/drm/amd/include/amd_shared.h   |  1 +
 11 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 9ada56c16a58..4c851fde1e82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -840,6 +840,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				else if (type == CGS_UCODE_ID_SMU_SK)
 					strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
 				break;
+			case CHIP_POLARIS12:
+				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+				break;
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 60bd4afe45c8..fe3bb94fe58d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -73,6 +73,7 @@ static const char *amdgpu_asic_name[] = {
 	"STONEY",
 	"POLARIS10",
 	"POLARIS11",
+	"POLARIS12",
 	"LAST",
 };
 
@@ -1277,6 +1278,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 	case CHIP_FIJI:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index a81dfaeeb8c0..1d564beb0fde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -65,6 +65,7 @@
 #define FIRMWARE_STONEY		"amdgpu/stoney_uvd.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
+#define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
 
 /**
  * amdgpu_uvd_cs_ctx - Command submission parser context
@@ -98,6 +99,7 @@ MODULE_FIRMWARE(FIRMWARE_FIJI);
 MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
@@ -149,6 +151,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 		fw_name = FIRMWARE_POLARIS11;
 		break;
+	case CHIP_POLARIS12:
+		fw_name = FIRMWARE_POLARIS12;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 69b66b9e7f57..8fec802d3908 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -52,6 +52,7 @@
 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
 #define FIRMWARE_POLARIS11         "amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12         "amdgpu/polaris12_vce.bin"
 
 #ifdef CONFIG_DRM_AMDGPU_CIK
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -66,6 +67,7 @@ MODULE_FIRMWARE(FIRMWARE_FIJI);
 MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 
@@ -121,6 +123,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	case CHIP_POLARIS11:
 		fw_name = FIRMWARE_POLARIS11;
 		break;
+	case CHIP_POLARIS12:
+		fw_name = FIRMWARE_POLARIS12;
+		break;
 
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index b3d62b909f43..2006abbbfb62 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -167,6 +167,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		amdgpu_program_register_sequence(adev,
 						 polaris11_golden_settings_a11,
 						 (const u32)ARRAY_SIZE(polaris11_golden_settings_a11));
@@ -608,6 +609,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
 		num_crtc = 6;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		num_crtc = 5;
 		break;
 	default:
@@ -1589,6 +1591,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
 		adev->mode_info.audio.num_pins = 8;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		adev->mode_info.audio.num_pins = 6;
 		break;
 	default:
@@ -2388,7 +2391,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
 	int pll;
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
-	    (adev->asic_type == CHIP_POLARIS11)) {
+	    (adev->asic_type == CHIP_POLARIS11) ||
+	    (adev->asic_type == CHIP_POLARIS12)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2822,7 +2826,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
 		return -EINVAL;
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
-	    (adev->asic_type == CHIP_POLARIS11)) {
+	    (adev->asic_type == CHIP_POLARIS11) ||
+	    (adev->asic_type == CHIP_POLARIS12)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		int encoder_mode =
@@ -2992,6 +2997,7 @@ static int dce_v11_0_early_init(void *handle)
 		adev->mode_info.num_dig = 6;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		adev->mode_info.num_hpd = 5;
 		adev->mode_info.num_dig = 5;
 		break;
@@ -3101,7 +3107,8 @@ static int dce_v11_0_hw_init(void *handle)
 	amdgpu_atombios_crtc_powergate_init(adev);
 	amdgpu_atombios_encoder_init_dig(adev);
 	if ((adev->asic_type == CHIP_POLARIS10) ||
-	    (adev->asic_type == CHIP_POLARIS11)) {
+	    (adev->asic_type == CHIP_POLARIS11) ||
+	    (adev->asic_type == CHIP_POLARIS12)) {
 		amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
 						   DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
 		amdgpu_atombios_crtc_set_dce_clock(adev, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index d0ec00986f38..373374164bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -139,6 +139,13 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
+
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -689,6 +696,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		amdgpu_program_register_sequence(adev,
 						 golden_settings_polaris11_a11,
 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -903,6 +911,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		break;
+	case CHIP_POLARIS12:
+		chip_name = "polaris12";
+		break;
 	case CHIP_STONEY:
 		chip_name = "stoney";
 		break;
@@ -1768,6 +1779,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		ret = amdgpu_atombios_get_gfx_info(adev);
 		if (ret)
 			return ret;
@@ -2682,6 +2694,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3503,6 +3516,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
 		*rconf1 |= 0x0;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
 			  SE_XSEL(1) | SE_YSEL(1);
 		*rconf1 |= 0x0;
@@ -4021,7 +4035,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 			cz_enable_cp_power_gating(adev, true);
 		else
 			cz_enable_cp_power_gating(adev, false);
-	} else if (adev->asic_type == CHIP_POLARIS11) {
+	} else if ((adev->asic_type == CHIP_POLARIS11) ||
+		   (adev->asic_type == CHIP_POLARIS12)) {
 		gfx_v8_0_init_csb(adev);
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
@@ -4095,7 +4110,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
 	if (adev->asic_type == CHIP_POLARIS11 ||
-	    adev->asic_type == CHIP_POLARIS10) {
+	    adev->asic_type == CHIP_POLARIS10 ||
+	    adev->asic_type == CHIP_POLARIS12) {
 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
 		tmp &= ~0x3;
 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -4283,6 +4299,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 		amdgpu_ring_write(ring, 0x0000002A);
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		amdgpu_ring_write(ring, 0x16000012);
 		amdgpu_ring_write(ring, 0x00000000);
 		break;
@@ -4664,7 +4681,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
 			    (adev->asic_type == CHIP_FIJI) ||
 			    (adev->asic_type == CHIP_STONEY) ||
 			    (adev->asic_type == CHIP_POLARIS11) ||
-			    (adev->asic_type == CHIP_POLARIS10)) {
+			    (adev->asic_type == CHIP_POLARIS10) ||
+			    (adev->asic_type == CHIP_POLARIS12)) {
 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
 				       AMDGPU_DOORBELL_KIQ << 2);
 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
@@ -4700,7 +4718,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
 		mqd->cp_hqd_persistent_state = tmp;
 		if (adev->asic_type == CHIP_STONEY ||
 			adev->asic_type == CHIP_POLARIS11 ||
-			adev->asic_type == CHIP_POLARIS10) {
+			adev->asic_type == CHIP_POLARIS10 ||
+			adev->asic_type == CHIP_POLARIS12) {
 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
@@ -5279,7 +5298,8 @@ static int gfx_v8_0_late_init(void *handle)
 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
 						       bool enable)
 {
-	if (adev->asic_type == CHIP_POLARIS11)
+	if ((adev->asic_type == CHIP_POLARIS11) ||
+	    (adev->asic_type == CHIP_POLARIS12))
 		/* Send msg to SMU via Powerplay */
 		amdgpu_set_powergating_state(adev,
 					     AMD_IP_BLOCK_TYPE_SMC,
@@ -5353,6 +5373,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 0daac3a5be79..476bc9f1954b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -46,6 +46,7 @@ static int gmc_v8_0_wait_for_idle(void *handle);
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -130,6 +131,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		amdgpu_program_register_sequence(adev,
 						 golden_settings_polaris11_a11,
 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -225,6 +227,9 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		break;
+	case CHIP_POLARIS12:
+		chip_name = "polaris12";
+		break;
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 1170a64a3184..034ace79ed49 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -60,6 +60,8 @@ MODULE_FIRMWARE("amdgpu/polaris10_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
 
 
 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -206,6 +208,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		amdgpu_program_register_sequence(adev,
 						 golden_settings_polaris11_a11,
 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -278,6 +281,9 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		break;
+	case CHIP_POLARIS12:
+		chip_name = "polaris12";
+		break;
 	case CHIP_CARRIZO:
 		chip_name = "carrizo";
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6b3293a1c7b8..5fb0b7f5c065 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -320,11 +320,12 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
-	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
+	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
 	if ((adev->asic_type == CHIP_FIJI) ||
 	    (adev->asic_type == CHIP_STONEY) ||
 	    (adev->asic_type == CHIP_POLARIS10) ||
-	    (adev->asic_type == CHIP_POLARIS11))
+	    (adev->asic_type == CHIP_POLARIS11) ||
+	    (adev->asic_type == CHIP_POLARIS12))
 		return AMDGPU_VCE_HARVEST_VCE1;
 
 	/* Tonga and CZ are dual or single pipe */
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index bf088d6d9bf1..c2ac54f11341 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -88,6 +88,7 @@ MODULE_FIRMWARE("amdgpu/polaris10_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_smc.bin");
 
 /*
  * Indirect registers accessor
@@ -312,6 +313,7 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
 	default:
 		break;
 	}
@@ -671,6 +673,7 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
 	case CHIP_TONGA:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 		asic_register_table = cz_allowed_read_registers;
@@ -994,6 +997,11 @@ static int vi_common_early_init(void *handle)
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x50;
 		break;
+	case CHIP_POLARIS12:
+		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG;
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x64;
+		break;
 	case CHIP_CARRIZO:
 		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_GFX_MGCG |
@@ -1346,6 +1354,7 @@ static int vi_common_set_clockgating_state(void *handle,
 	case CHIP_TONGA:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		vi_common_set_clockgating_state_by_smu(adev, state);
 	default:
 		break;
@@ -1429,6 +1438,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
 		amdgpu_ip_block_add(adev, &vi_common_ip_block);
 		amdgpu_ip_block_add(adev, &gmc_v8_1_ip_block);
 		amdgpu_ip_block_add(adev, &tonga_ih_ip_block);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index c02469ada9f1..5f59117109d7 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -46,6 +46,7 @@ enum amd_asic_type {
 	CHIP_STONEY,
 	CHIP_POLARIS10,
 	CHIP_POLARIS11,
+	CHIP_POLARIS12,
 	CHIP_LAST,
 };
 

From f4309526576db325264b6dc9ee150ee70b330a42 Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Wed, 14 Dec 2016 15:40:48 -0500
Subject: [PATCH 106/297] drm/amdgpu/powerplay: add Polaris12 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ken Wang  <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c        | 1 +
 drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c          | 3 ++-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c | 2 +-
 drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c        | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index fc592c2b0e16..95a568df8551 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -98,6 +98,7 @@ static int amdgpu_pp_early_init(void *handle)
 	switch (adev->asic_type) {
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS12:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
 	case CHIP_TOPAZ:
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index dc6700aee18f..b03606405a53 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -95,6 +95,7 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 			break;
 		case CHIP_POLARIS11:
 		case CHIP_POLARIS10:
+		case CHIP_POLARIS12:
 			polaris_set_asic_special_caps(hwmgr);
 			hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK);
 			break;
@@ -745,7 +746,7 @@ int polaris_set_asic_special_caps(struct pp_hwmgr *hwmgr)
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_TablelessHardwareInterface);
 
-	if (hwmgr->chip_id == CHIP_POLARIS11)
+	if ((hwmgr->chip_id == CHIP_POLARIS11) || (hwmgr->chip_id == CHIP_POLARIS12))
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index 26477f0f09dc..6cd1287a7a8f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -521,7 +521,7 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr)
 				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
 				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10);
 				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
-			} else if (hwmgr->chip_id == CHIP_POLARIS11) {
+			} else if ((hwmgr->chip_id == CHIP_POLARIS11) || (hwmgr->chip_id == CHIP_POLARIS12)) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
 				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
 				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index e5812aa456f3..6e618aa20719 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -65,6 +65,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 			break;
 		case CHIP_POLARIS11:
 		case CHIP_POLARIS10:
+		case CHIP_POLARIS12:
 			polaris10_smum_init(smumgr);
 			break;
 		default:

From fc8e9c54699e42754094ff475da46440778d8f19 Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Thu, 4 Aug 2016 12:54:22 +0800
Subject: [PATCH 107/297] drm/amd/amdgpu: add Polaris12 PCI ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ken Wang  <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8cb937b2bfcc..2534adaebe30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -418,6 +418,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
 	{0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
 	{0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	/* Polaris12 */
+	{0x1002, 0x6980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 
 	{0, 0, 0}
 };

From d6df71e125b4e4ab8932349ce81e09ef73304b91 Mon Sep 17 00:00:00 2001
From: Yintian Tao <yttao@amd.com>
Date: Wed, 21 Dec 2016 14:32:21 +0800
Subject: [PATCH 108/297] drm/amdgpu: remove static integer for uvd pp state

At two gpu core condition, static integer will cause that second gpu
core uvd state setting will be directly skipped due to the first one
setting

Signed-off-by: Yintian Tao <yttao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index a79e283590fb..6de6becce745 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -791,15 +791,10 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
-	static int curstate = -1;
 
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
 		return 0;
 
-	if (curstate == state)
-		return 0;
-
-	curstate = state;
 	if (enable) {
 		/* wait for STATUS to clear */
 		if (uvd_v5_0_wait_for_idle(handle))

From 70fd80d6f7e37bf637331c682fafcce1112750ac Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 21 Dec 2016 17:44:59 +0800
Subject: [PATCH 109/297] drm/amd/powerplay: extend smu's response timeout
 time.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/amd_shared.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 5f59117109d7..85f358764bbc 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -23,7 +23,7 @@
 #ifndef __AMD_SHARED_H__
 #define __AMD_SHARED_H__
 
-#define AMD_MAX_USEC_TIMEOUT		100000  /* 100 ms */
+#define AMD_MAX_USEC_TIMEOUT		200000  /* 200 ms */
 
 /*
  * Supported ASIC types

From 5165484b02f2cbedb5bf3a41ff5e8ae16069016c Mon Sep 17 00:00:00 2001
From: Flora Cui <Flora.Cui@amd.com>
Date: Thu, 15 Dec 2016 13:43:59 +0800
Subject: [PATCH 110/297] drm/amdgpu: update si kicker smc firmware

Use the appropriate smc firmware for each chip revision.
Using the wrong one can cause stability issues.

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Flora Cui <Flora.Cui@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 57 ++++++++++++++---------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 6c65a1a2de79..0566e9adaf8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -56,7 +56,6 @@
 #define BIOS_SCRATCH_4                                    0x5cd
 
 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
-MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
 MODULE_FIRMWARE("radeon/verde_smc.bin");
@@ -7687,49 +7686,49 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev)
 		chip_name = "tahiti";
 		break;
 	case CHIP_PITCAIRN:
-		if ((adev->pdev->revision == 0x81) ||
-		    (adev->pdev->device == 0x6810) ||
-		    (adev->pdev->device == 0x6811) ||
-		    (adev->pdev->device == 0x6816) ||
-		    (adev->pdev->device == 0x6817) ||
-		    (adev->pdev->device == 0x6806))
+		if ((adev->pdev->revision == 0x81) &&
+		    ((adev->pdev->device == 0x6810) ||
+		    (adev->pdev->device == 0x6811)))
 			chip_name = "pitcairn_k";
 		else
 			chip_name = "pitcairn";
 		break;
 	case CHIP_VERDE:
-		if ((adev->pdev->revision == 0x81) ||
-		    (adev->pdev->revision == 0x83) ||
-		    (adev->pdev->revision == 0x87) ||
-		    (adev->pdev->device == 0x6820) ||
-		    (adev->pdev->device == 0x6821) ||
-		    (adev->pdev->device == 0x6822) ||
-		    (adev->pdev->device == 0x6823) ||
-		    (adev->pdev->device == 0x682A) ||
-		    (adev->pdev->device == 0x682B))
+		if (((adev->pdev->device == 0x6820) &&
+			((adev->pdev->revision == 0x81) ||
+			(adev->pdev->revision == 0x83))) ||
+		    ((adev->pdev->device == 0x6821) &&
+			((adev->pdev->revision == 0x83) ||
+			(adev->pdev->revision == 0x87))) ||
+		    ((adev->pdev->revision == 0x87) &&
+			((adev->pdev->device == 0x6823) ||
+			(adev->pdev->device == 0x682b))))
 			chip_name = "verde_k";
 		else
 			chip_name = "verde";
 		break;
 	case CHIP_OLAND:
-		if ((adev->pdev->revision == 0xC7) ||
-		    (adev->pdev->revision == 0x80) ||
-		    (adev->pdev->revision == 0x81) ||
-		    (adev->pdev->revision == 0x83) ||
-		    (adev->pdev->revision == 0x87) ||
-		    (adev->pdev->device == 0x6604) ||
-		    (adev->pdev->device == 0x6605))
+		if (((adev->pdev->revision == 0x81) &&
+			((adev->pdev->device == 0x6600) ||
+			(adev->pdev->device == 0x6604) ||
+			(adev->pdev->device == 0x6605) ||
+			(adev->pdev->device == 0x6610))) ||
+		    ((adev->pdev->revision == 0x83) &&
+			(adev->pdev->device == 0x6610)))
 			chip_name = "oland_k";
 		else
 			chip_name = "oland";
 		break;
 	case CHIP_HAINAN:
-		if ((adev->pdev->revision == 0x81) ||
-		    (adev->pdev->revision == 0x83) ||
-		    (adev->pdev->revision == 0xC3) ||
-		    (adev->pdev->device == 0x6664) ||
-		    (adev->pdev->device == 0x6665) ||
-		    (adev->pdev->device == 0x6667))
+		if (((adev->pdev->revision == 0x81) &&
+			(adev->pdev->device == 0x6660)) ||
+		    ((adev->pdev->revision == 0x83) &&
+			((adev->pdev->device == 0x6660) ||
+			(adev->pdev->device == 0x6663) ||
+			(adev->pdev->device == 0x6665) ||
+			(adev->pdev->device == 0x6667))) ||
+		    ((adev->pdev->revision == 0xc3) &&
+			(adev->pdev->device == 0x6665)))
 			chip_name = "hainan_k";
 		else
 			chip_name = "hainan";

From 6458bd4dfd9414cba5804eb9907fe2a824278c34 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 5 Jan 2017 12:15:52 -0500
Subject: [PATCH 111/297] drm/radeon: update smc firmware selection for SI

Use the appropriate smc firmware for each chip revision.
Using the wrong one can cause stability issues.

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/si.c | 60 +++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ad4d7b8b8322..e8a38d296855 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -50,7 +50,6 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin");
 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
-MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
@@ -1657,9 +1656,6 @@ static int si_init_microcode(struct radeon_device *rdev)
 	switch (rdev->family) {
 	case CHIP_TAHITI:
 		chip_name = "TAHITI";
-		/* XXX: figure out which Tahitis need the new ucode */
-		if (0)
-			new_smc = true;
 		new_chip_name = "tahiti";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1671,12 +1667,9 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_PITCAIRN:
 		chip_name = "PITCAIRN";
-		if ((rdev->pdev->revision == 0x81) ||
-		    (rdev->pdev->device == 0x6810) ||
-		    (rdev->pdev->device == 0x6811) ||
-		    (rdev->pdev->device == 0x6816) ||
-		    (rdev->pdev->device == 0x6817) ||
-		    (rdev->pdev->device == 0x6806))
+		if ((rdev->pdev->revision == 0x81) &&
+		    ((rdev->pdev->device == 0x6810) ||
+		     (rdev->pdev->device == 0x6811)))
 			new_smc = true;
 		new_chip_name = "pitcairn";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
@@ -1689,15 +1682,15 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_VERDE:
 		chip_name = "VERDE";
-		if ((rdev->pdev->revision == 0x81) ||
-		    (rdev->pdev->revision == 0x83) ||
-		    (rdev->pdev->revision == 0x87) ||
-		    (rdev->pdev->device == 0x6820) ||
-		    (rdev->pdev->device == 0x6821) ||
-		    (rdev->pdev->device == 0x6822) ||
-		    (rdev->pdev->device == 0x6823) ||
-		    (rdev->pdev->device == 0x682A) ||
-		    (rdev->pdev->device == 0x682B))
+		if (((rdev->pdev->device == 0x6820) &&
+		     ((rdev->pdev->revision == 0x81) ||
+		      (rdev->pdev->revision == 0x83))) ||
+		    ((rdev->pdev->device == 0x6821) &&
+		     ((rdev->pdev->revision == 0x83) ||
+		      (rdev->pdev->revision == 0x87))) ||
+		    ((rdev->pdev->revision == 0x87) &&
+		     ((rdev->pdev->device == 0x6823) ||
+		      (rdev->pdev->device == 0x682b))))
 			new_smc = true;
 		new_chip_name = "verde";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
@@ -1710,13 +1703,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_OLAND:
 		chip_name = "OLAND";
-		if ((rdev->pdev->revision == 0xC7) ||
-		    (rdev->pdev->revision == 0x80) ||
-		    (rdev->pdev->revision == 0x81) ||
-		    (rdev->pdev->revision == 0x83) ||
-		    (rdev->pdev->revision == 0x87) ||
-		    (rdev->pdev->device == 0x6604) ||
-		    (rdev->pdev->device == 0x6605))
+		if (((rdev->pdev->revision == 0x81) &&
+		     ((rdev->pdev->device == 0x6600) ||
+		      (rdev->pdev->device == 0x6604) ||
+		      (rdev->pdev->device == 0x6605) ||
+		      (rdev->pdev->device == 0x6610))) ||
+		    ((rdev->pdev->revision == 0x83) &&
+		     (rdev->pdev->device == 0x6610)))
 			new_smc = true;
 		new_chip_name = "oland";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
@@ -1728,12 +1721,15 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAINAN:
 		chip_name = "HAINAN";
-		if ((rdev->pdev->revision == 0x81) ||
-		    (rdev->pdev->revision == 0x83) ||
-		    (rdev->pdev->revision == 0xC3) ||
-		    (rdev->pdev->device == 0x6664) ||
-		    (rdev->pdev->device == 0x6665) ||
-		    (rdev->pdev->device == 0x6667))
+		if (((rdev->pdev->revision == 0x81) &&
+		     (rdev->pdev->device == 0x6660)) ||
+		    ((rdev->pdev->revision == 0x83) &&
+		     ((rdev->pdev->device == 0x6660) ||
+		      (rdev->pdev->device == 0x6663) ||
+		      (rdev->pdev->device == 0x6665) ||
+		      (rdev->pdev->device == 0x6667))) ||
+		    ((rdev->pdev->revision == 0xc3) &&
+		     (rdev->pdev->device == 0x6665)))
 			new_smc = true;
 		new_chip_name = "hainan";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;

From 8a08403bcb39f5d0e733bcf59a8a74f16b538f6e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 5 Jan 2017 12:39:01 -0500
Subject: [PATCH 112/297] drm/radeon: drop verde dpm quirks

fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=98897
https://bugs.launchpad.net/bugs/1651981

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Cc: Adrian Fiergolski <A.Fiergolski@gmail.com>
---
 drivers/gpu/drm/radeon/si_dpm.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 8b5e697f2549..13ba73fd9b68 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3008,19 +3008,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6817) ||
 		    (rdev->pdev->device == 0x6806))
 			max_mclk = 120000;
-	} else if (rdev->family == CHIP_VERDE) {
-		if ((rdev->pdev->revision == 0x81) ||
-		    (rdev->pdev->revision == 0x83) ||
-		    (rdev->pdev->revision == 0x87) ||
-		    (rdev->pdev->device == 0x6820) ||
-		    (rdev->pdev->device == 0x6821) ||
-		    (rdev->pdev->device == 0x6822) ||
-		    (rdev->pdev->device == 0x6823) ||
-		    (rdev->pdev->device == 0x682A) ||
-		    (rdev->pdev->device == 0x682B)) {
-			max_sclk = 75000;
-			max_mclk = 80000;
-		}
 	} else if (rdev->family == CHIP_OLAND) {
 		if ((rdev->pdev->revision == 0xC7) ||
 		    (rdev->pdev->revision == 0x80) ||

From 7192c54a68013f6058b1bb505645fcd07015191c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 5 Jan 2017 13:02:37 -0500
Subject: [PATCH 113/297] drm/amdgpu: drop verde dpm quirks

Port of radeon change to amdgpu.

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 0566e9adaf8a..10bedfac27b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3487,19 +3487,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 		    (adev->pdev->device == 0x6817) ||
 		    (adev->pdev->device == 0x6806))
 			max_mclk = 120000;
-	} else if (adev->asic_type == CHIP_VERDE) {
-		if ((adev->pdev->revision == 0x81) ||
-		    (adev->pdev->revision == 0x83) ||
-		    (adev->pdev->revision == 0x87) ||
-		    (adev->pdev->device == 0x6820) ||
-		    (adev->pdev->device == 0x6821) ||
-		    (adev->pdev->device == 0x6822) ||
-		    (adev->pdev->device == 0x6823) ||
-		    (adev->pdev->device == 0x682A) ||
-		    (adev->pdev->device == 0x682B)) {
-			max_sclk = 75000;
-			max_mclk = 80000;
-		}
 	} else if (adev->asic_type == CHIP_OLAND) {
 		if ((adev->pdev->revision == 0xC7) ||
 		    (adev->pdev->revision == 0x80) ||

From bcd5e1a49f0d54afd3c5411bed2f59996e1c53e4 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 6 Jan 2017 14:26:54 -0500
Subject: [PATCH 114/297] netlabel: add CALIPSO to the list of built-in
 protocols

When we added CALIPSO support in Linux v4.8 we forgot to add it to the
list of supported protocols with display at boot.

Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_kapi.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 28c56b95fb7f..ea7c67050792 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1502,10 +1502,7 @@ static int __init netlbl_init(void)
 	printk(KERN_INFO "NetLabel: Initializing\n");
 	printk(KERN_INFO "NetLabel:  domain hash size = %u\n",
 	       (1 << NETLBL_DOMHSH_BITSIZE));
-	printk(KERN_INFO "NetLabel:  protocols ="
-	       " UNLABELED"
-	       " CIPSOv4"
-	       "\n");
+	printk(KERN_INFO "NetLabel:  protocols = UNLABELED CIPSOv4 CALIPSO\n");
 
 	ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
 	if (ret_val != 0)

From 1d0f110a2c6c4bca3dbcc4b0e27f1e3dc2d44a2c Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Fri, 6 Jan 2017 20:30:02 +0100
Subject: [PATCH 115/297] be2net: fix accesses to unicast list

Commit 988d44b "be2net: Avoid redundant addition of mac address in HW"
introduced be_dev_mac_add & be_uc_mac_add helpers that incorrectly
access adapter->uc_list as an array of bytes instead of an array of
be_eth_addr. Consequently NIC is not filled with valid data so unicast
filtering is broken.

Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Fixes: 988d44b be2net: Avoid redundant addition of mac address in HW
Signed-off-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 225e9a4877d7..3510352866c2 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -275,8 +275,7 @@ static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 
 	/* Check if mac has already been added as part of uc-list */
 	for (i = 0; i < adapter->uc_macs; i++) {
-		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
-				     mac)) {
+		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 			/* mac already added, skip addition */
 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 			return 0;
@@ -1655,14 +1654,12 @@ static void be_clear_mc_list(struct be_adapter *adapter)
 
 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
 {
-	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
-			     adapter->dev_mac)) {
+	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
 		return 0;
 	}
 
-	return be_cmd_pmac_add(adapter,
-			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
 			       adapter->if_handle,
 			       &adapter->pmac_id[uc_idx + 1], 0);
 }

From 20b1e22d01a4b0b11d3a1066e9feb04be38607ec Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nicstange@gmail.com>
Date: Thu, 5 Jan 2017 13:51:29 +0100
Subject: [PATCH 116/297] x86/efi: Don't allocate memmap through memblock after
 mm_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the following commit:

  4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")

...  efi_bgrt_init() calls into the memblock allocator through
efi_mem_reserve() => efi_arch_mem_reserve() *after* mm_init() has been called.

Indeed, KASAN reports a bad read access later on in efi_free_boot_services():

  BUG: KASAN: use-after-free in efi_free_boot_services+0xae/0x24c
            at addr ffff88022de12740
  Read of size 4 by task swapper/0/0
  page:ffffea0008b78480 count:0 mapcount:-127
  mapping:          (null) index:0x1 flags: 0x5fff8000000000()
  [...]
  Call Trace:
   dump_stack+0x68/0x9f
   kasan_report_error+0x4c8/0x500
   kasan_report+0x58/0x60
   __asan_load4+0x61/0x80
   efi_free_boot_services+0xae/0x24c
   start_kernel+0x527/0x562
   x86_64_start_reservations+0x24/0x26
   x86_64_start_kernel+0x157/0x17a
   start_cpu+0x5/0x14

The instruction at the given address is the first read from the memmap's
memory, i.e. the read of md->type in efi_free_boot_services().

Note that the writes earlier in efi_arch_mem_reserve() don't splat because
they're done through early_memremap()ed addresses.

So, after memblock is gone, allocations should be done through the "normal"
page allocator. Introduce a helper, efi_memmap_alloc() for this. Use
it from efi_arch_mem_reserve(), efi_free_boot_services() and, for the sake
of consistency, from efi_fake_memmap() as well.

Note that for the latter, the memmap allocations cease to be page aligned.
This isn't needed though.

Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.9
Cc: Dave Young <dyoung@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mika Penttilä <mika.penttila@nextfour.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")
Link: http://lkml.kernel.org/r/20170105125130.2815-1-nicstange@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/quirks.c  |  4 ++--
 drivers/firmware/efi/fake_mem.c |  3 +--
 drivers/firmware/efi/memmap.c   | 38 +++++++++++++++++++++++++++++++++
 include/linux/efi.h             |  1 +
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 10aca63a50d7..30031d5293c4 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 
 	new_size = efi.memmap.desc_size * num_entries;
 
-	new_phys = memblock_alloc(new_size, 0);
+	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
 		pr_err("Could not allocate boot services memmap\n");
 		return;
@@ -355,7 +355,7 @@ void __init efi_free_boot_services(void)
 	}
 
 	new_size = efi.memmap.desc_size * num_entries;
-	new_phys = memblock_alloc(new_size, 0);
+	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
 		pr_err("Failed to allocate new EFI memmap\n");
 		return;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 520a40e5e0e4..6c7d60c239b5 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -71,8 +71,7 @@ void __init efi_fake_memmap(void)
 	}
 
 	/* allocate memory for new EFI memmap */
-	new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map,
-					PAGE_SIZE);
+	new_memmap_phy = efi_memmap_alloc(new_nr_map);
 	if (!new_memmap_phy)
 		return;
 
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index f03ddecd232b..78686443cb37 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -9,6 +9,44 @@
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <asm/early_ioremap.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
+{
+	return memblock_alloc(size, 0);
+}
+
+static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
+{
+	unsigned int order = get_order(size);
+	struct page *p = alloc_pages(GFP_KERNEL, order);
+
+	if (!p)
+		return 0;
+
+	return PFN_PHYS(page_to_pfn(p));
+}
+
+/**
+ * efi_memmap_alloc - Allocate memory for the EFI memory map
+ * @num_entries: Number of entries in the allocated map.
+ *
+ * Depending on whether mm_init() has already been invoked or not,
+ * either memblock or "normal" page allocation is used.
+ *
+ * Returns the physical address of the allocated memory map on
+ * success, zero on failure.
+ */
+phys_addr_t __init efi_memmap_alloc(unsigned int num_entries)
+{
+	unsigned long size = num_entries * efi.memmap.desc_size;
+
+	if (slab_is_available())
+		return __efi_memmap_alloc_late(size);
+
+	return __efi_memmap_alloc_early(size);
+}
 
 /**
  * __efi_memmap_init - Common code for mapping the EFI memory map
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a07a476178cd..0c5420208c40 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -950,6 +950,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 
+extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries);
 extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
 extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
 extern void __init efi_memmap_unmap(void);

From 6052cd1af86f9833b6b0b60d5d4787c4a06d65ea Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Fri, 6 Jan 2017 21:59:30 +0100
Subject: [PATCH 117/297] be2net: fix unicast list filling

The adapter->pmac_id[0] item is used for primary MAC address but
this is not true for adapter->uc_list[0] as is assumed in
be_set_uc_list(). There are N UC addresses copied first from net_device
to adapter->uc_list[1..N] and then N UC addresses from
adapter->uc_list[0..N-1] are sent to HW. So the last UC address is never
stored into HW and address 00:00:00:00;00:00 (from uc_list[0]) is used
instead.

Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Fixes: b717241 be2net: replace polling with sleeping in the FW completion path
Signed-off-by: Ivan Vecera <cera@cera.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3510352866c2..ec010ced6c99 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1695,9 +1695,8 @@ static void be_set_uc_list(struct be_adapter *adapter)
 	}
 
 	if (adapter->update_uc_list) {
-		i = 1; /* First slot is claimed by the Primary MAC */
-
 		/* cache the uc-list in adapter array */
+		i = 0;
 		netdev_for_each_uc_addr(ha, netdev) {
 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
 			i++;

From f5992b72ebe0dde488fa8f706b887194020c66fc Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 6 Jan 2017 16:18:53 -0500
Subject: [PATCH 118/297] tg3: Fix race condition in tg3_get_stats64().

The driver's ndo_get_stats64() method is not always called under RTNL.
So it can race with driver close or ethtool reconfigurations.  Fix the
race condition by taking tp->lock spinlock in tg3_free_consistent()
when freeing the tp->hw_stats memory block.  tg3_get_stats64() is
already taking tp->lock.

Reported-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 185e9e047aa9..ae42de4fdddf 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8720,11 +8720,14 @@ static void tg3_free_consistent(struct tg3 *tp)
 	tg3_mem_rx_release(tp);
 	tg3_mem_tx_release(tp);
 
+	/* Protect tg3_get_stats64() from reading freed tp->hw_stats. */
+	tg3_full_lock(tp, 0);
 	if (tp->hw_stats) {
 		dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats),
 				  tp->hw_stats, tp->stats_mapping);
 		tp->hw_stats = NULL;
 	}
+	tg3_full_unlock(tp);
 }
 
 /*

From 9d5ecb09d525469abd1a10c096cb5a17206523f2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 7 Jan 2017 00:26:33 +0100
Subject: [PATCH 119/297] bpf: change back to orig prog on too many passes

If after too many passes still no image could be emitted, then
swap back to the original program as we do in all other cases
and don't use the one with blinding.

Fixes: 959a75791603 ("bpf, x86: add support for constant blinding")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e76d1af60f7a..bb660e53cbd6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
+	} else {
+		prog = orig_prog;
 	}
 
 out_addrs:

From a2cd64f30140c5aebd9359f66c00c19d5c6bece6 Mon Sep 17 00:00:00 2001
From: "Kweh, Hock Leong" <hock.leong.kweh@intel.com>
Date: Sat, 7 Jan 2017 17:32:03 +0800
Subject: [PATCH 120/297] net: stmmac: fix maxmtu assignment to be within valid
 range

There is no checking valid value of maxmtu when getting it from
device tree. This resolution added the checking condition to
ensure the assignment is made within a valid range.

Signed-off-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c  |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 39eb7a65bb9f..a276a32d57f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3319,8 +3319,16 @@ int stmmac_dvr_probe(struct device *device,
 		ndev->max_mtu = JUMBO_LEN;
 	else
 		ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
-	if (priv->plat->maxmtu < ndev->max_mtu)
+	/* Will not overwrite ndev->max_mtu if plat->maxmtu > ndev->max_mtu
+	 * as well as plat->maxmtu < ndev->min_mtu which is a invalid range.
+	 */
+	if ((priv->plat->maxmtu < ndev->max_mtu) &&
+	    (priv->plat->maxmtu >= ndev->min_mtu))
 		ndev->max_mtu = priv->plat->maxmtu;
+	else if (priv->plat->maxmtu < ndev->min_mtu)
+		netdev_warn(priv->dev,
+			    "%s: warning: maxmtu having invalid value (%d)\n",
+			    __func__, priv->plat->maxmtu);
 
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index a2831773431a..3da4737620cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -89,6 +89,9 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 
 	/* Set default value for unicast filter entries */
 	plat->unicast_filter_entries = 1;
+
+	/* Set the maxmtu to a default of JUMBO_LEN */
+	plat->maxmtu = JUMBO_LEN;
 }
 
 static int quark_default_data(struct plat_stmmacenet_data *plat,
@@ -126,6 +129,9 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	/* Set default value for unicast filter entries */
 	plat->unicast_filter_entries = 1;
 
+	/* Set the maxmtu to a default of JUMBO_LEN */
+	plat->maxmtu = JUMBO_LEN;
+
 	return 0;
 }
 

From a4c61b92b3a4cbda35bb0251a5063a68f0861b2c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 7 Jan 2017 21:01:56 -0800
Subject: [PATCH 121/297] net: dsa: bcm_sf2: Do not clobber b53_switch_ops

We make the bcm_sf2 driver override ds->ops which points to
b53_switch_ops since b53_switch_alloc() did the assignent. This is all
well and good until a second b53 switch comes in, and ends up using the
bcm_sf2 operations. Make a proper local copy, substitute the ds->ops
pointer and then override the operations.

Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 9ec33b51a0ed..2f9f910c0e40 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -982,6 +982,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
 	struct device_node *dn = pdev->dev.of_node;
 	struct b53_platform_data *pdata;
+	struct dsa_switch_ops *ops;
 	struct bcm_sf2_priv *priv;
 	struct b53_device *dev;
 	struct dsa_switch *ds;
@@ -995,6 +996,10 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	ops = devm_kzalloc(&pdev->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
 	dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv);
 	if (!dev)
 		return -ENOMEM;
@@ -1014,6 +1019,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	ds = dev->ds;
 
 	/* Override the parts that are non-standard wrt. normal b53 devices */
+	memcpy(ops, ds->ops, sizeof(*ops));
+	ds->ops = ops;
 	ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol;
 	ds->ops->setup = bcm_sf2_sw_setup;
 	ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags;

From 2cfe8f8290bd28cf1ee67db914a6e76cf8e6437b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 7 Jan 2017 21:01:57 -0800
Subject: [PATCH 122/297] net: dsa: bcm_sf2: Utilize nested MDIO read/write

We are implementing a MDIO bus which is behind another one, so use the
nested version of the accessors to get lockdep annotations correct.

Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 2f9f910c0e40..2ce7ae97ac91 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -393,7 +393,7 @@ static int bcm_sf2_sw_mdio_read(struct mii_bus *bus, int addr, int regnum)
 	if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
 		return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0);
 	else
-		return mdiobus_read(priv->master_mii_bus, addr, regnum);
+		return mdiobus_read_nested(priv->master_mii_bus, addr, regnum);
 }
 
 static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum,
@@ -407,7 +407,7 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum,
 	if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
 		bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val);
 	else
-		mdiobus_write(priv->master_mii_bus, addr, regnum, val);
+		mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val);
 
 	return 0;
 }

From ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 14:12:51 +0100
Subject: [PATCH 123/297] btrfs: fix crash when tracepoint arguments are freed
 by wq callbacks

Enabling btrfs tracepoints leads to instant crash, as reported. The wq
callbacks could free the memory and the tracepoints started to
dereference the members to get to fs_info.

The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2
removed the tracepoints but we could preserve them by passing only the
required data in a safe way.

Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events")
CC: stable@vger.kernel.org # 4.8+
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/async-thread.c      | 15 +++++++++++----
 include/trace/events/btrfs.h | 22 +++++++++++++---------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 63d197724519..ff0b0be92d61 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 	unsigned long flags;
 
 	while (1) {
+		void *wtag;
+
 		spin_lock_irqsave(lock, flags);
 		if (list_empty(list))
 			break;
@@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 		spin_unlock_irqrestore(lock, flags);
 
 		/*
-		 * we don't want to call the ordered free functions
-		 * with the lock held though
+		 * We don't want to call the ordered free functions with the
+		 * lock held though. Save the work as tag for the trace event,
+		 * because the callback could free the structure.
 		 */
+		wtag = work;
 		work->ordered_free(work);
-		trace_btrfs_all_work_done(work);
+		trace_btrfs_all_work_done(wq->fs_info, wtag);
 	}
 	spin_unlock_irqrestore(lock, flags);
 }
@@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
 static void normal_work_helper(struct btrfs_work *work)
 {
 	struct __btrfs_workqueue *wq;
+	void *wtag;
 	int need_order = 0;
 
 	/*
@@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work)
 	if (work->ordered_func)
 		need_order = 1;
 	wq = work->wq;
+	/* Safe for tracepoints in case work gets freed by the callback */
+	wtag = work;
 
 	trace_btrfs_work_sched(work);
 	thresh_exec_hook(wq);
@@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work)
 		run_ordered_work(wq);
 	}
 	if (!need_order)
-		trace_btrfs_all_work_done(work);
+		trace_btrfs_all_work_done(wq->fs_info, wtag);
 }
 
 void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index c14bed4ab097..b09225c77676 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1157,22 +1157,26 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
 
-/* For situiations that the work is freed */
+/*
+ * For situiations when the work is freed, we pass fs_info and a tag that that
+ * matches address of the work structure so it can be paired with the
+ * scheduling event.
+ */
 DECLARE_EVENT_CLASS(btrfs__work__done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work),
+	TP_ARGS(fs_info, wtag),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	void *,	work			)
+		__field(	void *,	wtag			)
 	),
 
-	TP_fast_assign_btrfs(btrfs_work_owner(work),
-		__entry->work		= work;
+	TP_fast_assign_btrfs(fs_info,
+		__entry->wtag		= wtag;
 	),
 
-	TP_printk_btrfs("work->%p", __entry->work)
+	TP_printk_btrfs("work->%p", __entry->wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_work_queued,
@@ -1191,9 +1195,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched,
 
 DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work)
+	TP_ARGS(fs_info, wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_ordered_sched,

From 92a1bf76a89ad338f00eb9a2c7689a3907fbcaad Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 17 Nov 2016 15:00:50 -0800
Subject: [PATCH 124/297] Btrfs: add 'inode' for extent map tracepoint

'inode' is an important field for btrfs_get_extent, lets trace it.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c             |  2 +-
 include/trace/events/btrfs.h | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a713d9d324b0..fab189c67eff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7059,7 +7059,7 @@ insert:
 	write_unlock(&em_tree->lock);
 out:
 
-	trace_btrfs_get_extent(root, em);
+	trace_btrfs_get_extent(root, inode, em);
 
 	btrfs_free_path(path);
 	if (trans) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b09225c77676..3048f5205363 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -184,14 +184,16 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
 
 TRACE_EVENT_CONDITION(btrfs_get_extent,
 
-	TP_PROTO(struct btrfs_root *root, struct extent_map *map),
+	TP_PROTO(struct btrfs_root *root, struct inode *inode,
+		 struct extent_map *map),
 
-	TP_ARGS(root, map),
+	TP_ARGS(root, inode, map),
 
 	TP_CONDITION(map),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  root_objectid	)
+		__field(	u64,  ino		)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
 		__field(	u64,  orig_start	)
@@ -204,7 +206,8 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 
 	TP_fast_assign_btrfs(root->fs_info,
 		__entry->root_objectid	= root->root_key.objectid;
-		__entry->start 		= map->start;
+		__entry->ino		= btrfs_ino(inode);
+		__entry->start		= map->start;
 		__entry->len		= map->len;
 		__entry->orig_start	= map->orig_start;
 		__entry->block_start	= map->block_start;
@@ -214,11 +217,12 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu, "
+	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
 		  "orig_start = %llu, block_start = %llu(%s), "
 		  "block_len = %llu, flags = %s, refs = %u, "
 		  "compress_type = %u",
 		  show_root_type(__entry->root_objectid),
+		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->orig_start,

From 7856654842bdbebc0fbcbf51573da5d70a787aba Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:10:10 -0800
Subject: [PATCH 125/297] Btrfs: add truncated_len for ordered extent
 tracepoints

This can help us monitor truncated ordered extents.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 3048f5205363..2026a89786b0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -263,6 +263,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__field(	int,  compress_type	)
 		__field(	int,  refs		)
 		__field(	u64,  root_objectid	)
+		__field(	u64,  truncated_len	)
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
@@ -277,10 +278,12 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->refs		= atomic_read(&ordered->refs);
 		__entry->root_objectid	=
 				BTRFS_I(inode)->root->root_key.objectid;
+		__entry->truncated_len	= ordered->truncated_len;
 	),
 
 	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
 		  "start = %llu, len = %llu, disk_len = %llu, "
+		  "truncated_len = %llu, "
 		  "bytes_left = %llu, flags = %s, compress_type = %d, "
 		  "refs = %d",
 		  show_root_type(__entry->root_objectid),
@@ -289,6 +292,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->disk_len,
+		  (unsigned long long)__entry->truncated_len,
 		  (unsigned long long)__entry->bytes_left,
 		  show_ordered_flags(__entry->flags),
 		  __entry->compress_type, __entry->refs)

From 562a7a07bf61e2949f7cbdb6ac7537ad9e2794d1 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 15:51:36 +0100
Subject: [PATCH 126/297] btrfs: make tracepoint format strings more compact

We've recently added the fsid to trace events, this makes the line quite
long. To reduce the it again, remove extra spaces around = and remove
",".

Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 112 +++++++++++++++++------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2026a89786b0..88d18a8ceb59 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -130,8 +130,8 @@ DECLARE_EVENT_CLASS(btrfs__inode,
 				BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
-		  "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu",
+	TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu "
+		  "disk_i_size=%llu last_trans=%llu logged_trans=%llu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->generation,
 		  (unsigned long)__entry->ino,
@@ -217,10 +217,10 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
-		  "orig_start = %llu, block_start = %llu(%s), "
-		  "block_len = %llu, flags = %s, refs = %u, "
-		  "compress_type = %u",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu "
+		  "orig_start=%llu block_start=%llu(%s) "
+		  "block_len=%llu flags=%s refs=%u "
+		  "compress_type=%u",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
@@ -281,11 +281,11 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->truncated_len	= ordered->truncated_len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
-		  "start = %llu, len = %llu, disk_len = %llu, "
-		  "truncated_len = %llu, "
-		  "bytes_left = %llu, flags = %s, compress_type = %d, "
-		  "refs = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu file_offset=%llu "
+		  "start=%llu len=%llu disk_len=%llu "
+		  "truncated_len=%llu "
+		  "bytes_left=%llu flags=%s compress_type=%d "
+		  "refs=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->file_offset,
@@ -362,10 +362,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, "
-		  "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, "
-		  "range_end = %llu, for_kupdate = %d, "
-		  "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu "
+		  "nr_to_write=%ld pages_skipped=%ld range_start=%llu "
+		  "range_end=%llu for_kupdate=%d "
+		  "for_reclaim=%d range_cyclic=%d writeback_index=%lu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, __entry->index,
 		  __entry->nr_to_write, __entry->pages_skipped,
@@ -408,8 +408,8 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
 			 BTRFS_I(page->mapping->host)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
-		  "end = %llu, uptodate = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu "
+		  "end=%llu uptodate=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->index,
 		  (unsigned long long)__entry->start,
@@ -441,7 +441,7 @@ TRACE_EVENT(btrfs_sync_file,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->parent,
 		  __entry->datasync)
@@ -492,9 +492,9 @@ TRACE_EVENT(btrfs_add_block_group,
 		__entry->create		= create;
 	),
 
-	TP_printk("%pU: block_group offset = %llu, size = %llu, "
-		  "flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
-		  "create = %d", __entry->fsid,
+	TP_printk("%pU: block_group offset=%llu size=%llu "
+		  "flags=%llu(%s) bytes_used=%llu bytes_super=%llu "
+		  "create=%d", __entry->fsid,
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
 		  (unsigned long long)__entry->flags,
@@ -543,9 +543,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), level = %d, "
-		  "type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) level=%d "
+		  "type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -608,9 +608,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, "
-		  "offset = %llu, type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) owner=%llu "
+		  "offset=%llu type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -665,7 +665,7 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 		__entry->is_data	= head_ref->is_data;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -729,8 +729,8 @@ DECLARE_EVENT_CLASS(btrfs__chunk,
 		__entry->root_objectid	= fs_info->chunk_root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), offset = %llu, size = %llu, "
-		  "num_stripes = %d, sub_stripes = %d, type = %s",
+	TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
+		  "num_stripes=%d sub_stripes=%d type=%s",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
@@ -779,8 +779,8 @@ TRACE_EVENT(btrfs_cow_block,
 		__entry->cow_level	= btrfs_header_level(cow);
 	),
 
-	TP_printk_btrfs("root = %llu(%s), refs = %d, orig_buf = %llu "
-		  "(orig_level = %d), cow_buf = %llu (cow_level = %d)",
+	TP_printk_btrfs("root=%llu(%s) refs=%d orig_buf=%llu "
+		  "(orig_level=%d) cow_buf=%llu (cow_level=%d)",
 		  show_root_type(__entry->root_objectid),
 		  __entry->refs,
 		  (unsigned long long)__entry->buf_start,
@@ -844,7 +844,7 @@ TRACE_EVENT(btrfs_trigger_flush,
 		__assign_str(reason, reason)
 	),
 
-	TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
+	TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
 		  __entry->fsid, __get_str(reason), __entry->flush,
 		  show_flush_action(__entry->flush),
 		  (unsigned long long)__entry->flags,
@@ -887,8 +887,8 @@ TRACE_EVENT(btrfs_flush_space,
 		__entry->ret		=	ret;
 	),
 
-	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
-		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
+	TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu "
+		  "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state,
 		  show_flush_state(__entry->state),
 		  (unsigned long long)__entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -913,7 +913,7 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu",
+	TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len)
@@ -952,7 +952,7 @@ TRACE_EVENT(find_free_extent,
 		__entry->data		= data;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), len = %Lu, empty_size = %Lu, flags = %Lu(%s)",
+	TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->num_bytes, __entry->empty_size, __entry->data,
 		  __print_flags((unsigned long)__entry->data, "|",
@@ -981,8 +981,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
-		  "start = %Lu, len = %Lu",
+	TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) "
+		  "start=%Lu len=%Lu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->bg_objectid,
 		  __entry->flags, __print_flags((unsigned long)__entry->flags,
@@ -1033,8 +1033,8 @@ TRACE_EVENT(btrfs_find_cluster,
 		__entry->min_bytes	= min_bytes;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
-		  " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu "
+		  "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS), __entry->start,
@@ -1055,7 +1055,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
 		__entry->bg_objectid	= block_group->key.objectid;
 	),
 
-	TP_printk_btrfs("block_group = %Lu", __entry->bg_objectid)
+	TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid)
 );
 
 TRACE_EVENT(btrfs_setup_cluster,
@@ -1083,8 +1083,8 @@ TRACE_EVENT(btrfs_setup_cluster,
 		__entry->bitmap		= bitmap;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
-		  "size = %Lu, max_size = %Lu, bitmap = %d",
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu "
+		  "size=%Lu max_size=%Lu bitmap=%d",
 		  __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -1111,7 +1111,7 @@ TRACE_EVENT(alloc_extent_state,
 		__entry->ip	= IP
 	),
 
-	TP_printk("state=%p; mask = %s; caller = %pS", __entry->state,
+	TP_printk("state=%p mask=%s caller=%pS", __entry->state,
 		  show_gfp_flags(__entry->mask), (void *)__entry->ip)
 );
 
@@ -1131,7 +1131,7 @@ TRACE_EVENT(free_extent_state,
 		__entry->ip = IP
 	),
 
-	TP_printk(" state=%p; caller = %pS", __entry->state,
+	TP_printk("state=%p caller=%pS", __entry->state,
 		  (void *)__entry->ip)
 );
 
@@ -1159,8 +1159,8 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		__entry->normal_work	= &work->normal_work;
 	),
 
-	TP_printk_btrfs("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p,"
-		  " ordered_free=%p",
+	TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p "
+		  "ordered_free=%p",
 		  __entry->work, __entry->normal_work, __entry->wq,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
@@ -1233,7 +1233,7 @@ DECLARE_EVENT_CLASS(btrfs__workqueue,
 		__entry->high		= high;
 	),
 
-	TP_printk_btrfs("name=%s%s, wq=%p", __get_str(name),
+	TP_printk_btrfs("name=%s%s wq=%p", __get_str(name),
 		  __print_flags(__entry->high, "",
 				{(WQ_HIGHPRI),	"-high"}),
 		  __entry->wq)
@@ -1288,7 +1288,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_data_map,
 		__entry->free_reserved	=	free_reserved;
 	),
 
-	TP_printk_btrfs("rootid=%llu, ino=%lu, free_reserved=%llu",
+	TP_printk_btrfs("rootid=%llu ino=%lu free_reserved=%llu",
 		  __entry->rootid, __entry->ino, __entry->free_reserved)
 );
 
@@ -1335,7 +1335,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
 		__entry->op		= op;
 	),
 
-	TP_printk_btrfs("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
+	TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s",
 		  __entry->rootid, __entry->ino, __entry->start, __entry->len,
 		  __entry->reserved,
 		  __print_flags((unsigned long)__entry->op, "",
@@ -1373,7 +1373,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
 		__entry->reserved	= reserved;
 	),
 
-	TP_printk_btrfs("root=%llu, reserved=%llu, op=free",
+	TP_printk_btrfs("root=%llu reserved=%llu op=free",
 		  __entry->ref_root, __entry->reserved)
 );
 
@@ -1400,7 +1400,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 		__entry->num_bytes	= rec->num_bytes;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes)
 );
@@ -1442,8 +1442,8 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
 		__entry->nr_new_roots	= nr_new_roots;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
-		  "nr_new_roots = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu "
+		  "nr_new_roots=%llu",
 		  __entry->bytenr,
 		  __entry->num_bytes,
 		  __entry->nr_old_roots,
@@ -1469,7 +1469,7 @@ TRACE_EVENT(qgroup_update_counters,
 		__entry->cur_new_count	= cur_new_count;
 	),
 
-	TP_printk_btrfs("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+	TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu",
 		  __entry->qgid,
 		  __entry->cur_old_count,
 		  __entry->cur_new_count)

From fac69d0efad08fc15e4dbfc116830782acc0dc9a Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Sat, 7 Jan 2017 10:38:31 +0100
Subject: [PATCH 127/297] x86/boot: Add missing declaration of string functions

Add the missing declarations of basic string functions to string.h to allow
a clean build.

Fixes: 5be865661516 ("String-handling functions for the new x86 setup code.")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Link: http://lkml.kernel.org/r/1483781911-21399-1-git-send-email-hofrat@osadl.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/string.c | 1 +
 arch/x86/boot/string.h | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd583dce1..9e240fcba784 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include "ctype.h"
+#include "string.h"
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 725e820602b1..113588ddb43f 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len);
 #define memset(d,c,l) __builtin_memset(d,c,l)
 #define memcmp	__builtin_memcmp
 
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+					  unsigned int base);
+
 #endif /* BOOT_STRING_H */

From 02c5c03283c52157d336abf5e44ffcda10579fbf Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Tue, 27 Dec 2016 12:05:05 +0800
Subject: [PATCH 128/297] ASoC: rt5645: set sel_i2s_pre_div1 to 2

The i2s clock pre-divider 1 is used for both i2s1 and sysclk.
The i2s1 is usually used for the main i2s and the pre-divider
will be set in hw_params function.

However, if i2s2 is used, the pre-divider is not set in the hw_params
function and the default value of i2s clock pre-divider 1 is too high
for sysclk and DMIC usage. Fix by overriding default divider value to 2.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=95681
Tested-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5645.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 10c2a564a715..1ac96ef9ee20 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3833,6 +3833,9 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
 		}
 	}
 
+	regmap_update_bits(rt5645->regmap, RT5645_ADDA_CLK1,
+		RT5645_I2S_PD1_MASK, RT5645_I2S_PD1_2);
+
 	if (rt5645->pdata.jd_invert) {
 		regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2,
 			RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV);

From 4e2da44691cffbfffb1535f478d19bc2dca3e62b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:10 +0100
Subject: [PATCH 129/297] USB: serial: ch341: fix initial modem-control state

DTR and RTS will be asserted by the tty-layer when the port is opened
and deasserted on close (if HUPCL is set). Make sure the initial state
is not-asserted before the port is first opened as well.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 2597b83a8ae2..d133e72fe888 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -258,7 +258,6 @@ static int ch341_port_probe(struct usb_serial_port *port)
 
 	spin_lock_init(&priv->lock);
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(port->serial->dev, priv);
 	if (r < 0)

From a20047f36e2f6a1eea4f1fd261aaa55882369868 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:11 +0100
Subject: [PATCH 130/297] USB: serial: ch341: fix open and resume after B0

The private baud_rate variable is used to configure the port at open and
reset-resume and must never be set to (and left at) zero or reset-resume
and all further open attempts will fail.

Fixes: aa91def41a7b ("USB: ch341: set tty baud speed according to tty
struct")
Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index d133e72fe888..6279df905c14 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -355,7 +355,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 
 	baud_rate = tty_get_baud_rate(tty);
 
-	priv->baud_rate = baud_rate;
 	ctrl = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX;
 
 	switch (C_CSIZE(tty)) {
@@ -388,6 +387,9 @@ static void ch341_set_termios(struct tty_struct *tty,
 		spin_lock_irqsave(&priv->lock, flags);
 		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
 		spin_unlock_irqrestore(&priv->lock, flags);
+
+		priv->baud_rate = baud_rate;
+
 		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);

From 030ee7ae52a46a2be52ccc8242c4a330aba8d38e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:12 +0100
Subject: [PATCH 131/297] USB: serial: ch341: fix modem-control and B0 handling

The modem-control signals are managed by the tty-layer during open and
should not be asserted prematurely when set_termios is called from
driver open.

Also make sure that the signals are asserted only when changing speed
from B0.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 6279df905c14..0cc5056b304d 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -384,10 +384,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 		ctrl |= CH341_LCR_STOP_BITS_2;
 
 	if (baud_rate) {
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
-		spin_unlock_irqrestore(&priv->lock, flags);
-
 		priv->baud_rate = baud_rate;
 
 		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
@@ -395,14 +391,16 @@ static void ch341_set_termios(struct tty_struct *tty,
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);
 		}
-	} else {
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS);
-		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
-	ch341_set_handshake(port->serial->dev, priv->line_control);
+	spin_lock_irqsave(&priv->lock, flags);
+	if (C_BAUD(tty) == B0)
+		priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS);
+	else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
+		priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS);
+	spin_unlock_irqrestore(&priv->lock, flags);
 
+	ch341_set_handshake(port->serial->dev, priv->line_control);
 }
 
 static void ch341_break_ctl(struct tty_struct *tty, int break_state)

From f2950b78547ffb8475297ada6b92bc2d774d5461 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:13 +0100
Subject: [PATCH 132/297] USB: serial: ch341: fix open error handling

Make sure to stop the interrupt URB before returning on errors during
open.

Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 0cc5056b304d..8f41d4385f1c 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -319,7 +319,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
-		goto out;
+		return r;
 
 	if (tty)
 		ch341_set_termios(tty, port, NULL);
@@ -329,12 +329,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 	if (r) {
 		dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n",
 			__func__, r);
-		goto out;
+		return r;
 	}
 
 	r = usb_serial_generic_open(tty, port);
+	if (r)
+		goto err_kill_interrupt_urb;
 
-out:	return r;
+	return 0;
+
+err_kill_interrupt_urb:
+	usb_kill_urb(port->interrupt_in_urb);
+
+	return r;
 }
 
 /* Old_termios contains the original termios settings and

From ce5e292828117d1b71cbd3edf9e9137cf31acd30 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:14 +0100
Subject: [PATCH 133/297] USB: serial: ch341: fix resume after reset

Fix reset-resume handling which failed to resubmit the read and
interrupt URBs, thereby leaving a port that was open before suspend in a
broken state until closed and reopened.

Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after
resume")
Fixes: 2bfd1c96a9fb ("USB: serial: ch341: remove reset_resume callback")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8f41d4385f1c..5343d65f3b52 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -582,14 +582,23 @@ static int ch341_tiocmget(struct tty_struct *tty)
 
 static int ch341_reset_resume(struct usb_serial *serial)
 {
-	struct ch341_private *priv;
-
-	priv = usb_get_serial_port_data(serial->port[0]);
+	struct usb_serial_port *port = serial->port[0];
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	int ret;
 
 	/* reconfigure ch341 serial port after bus-reset */
 	ch341_configure(serial->dev, priv);
 
-	return 0;
+	if (tty_port_initialized(&port->port)) {
+		ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO);
+		if (ret) {
+			dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return usb_serial_generic_resume(serial);
 }
 
 static struct usb_serial_driver ch341_device = {

From 3cca8624b6624e7ffb87dcd8a0a05bef9b50e97b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:15 +0100
Subject: [PATCH 134/297] USB: serial: ch341: fix line settings after
 reset-resume

A recent change added support for modifying the default line-control
settings, but did not make sure that the modified settings were used as
part of reconfiguration after a device has been reset during resume.

This caused a port that was open before suspend to be unusable until
being closed and reopened.

Fixes: ba781bdf8662 ("USB: serial: ch341: add support for parity, frame
length, stop bits")
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 5343d65f3b52..eabdd05a2147 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -95,6 +95,7 @@ struct ch341_private {
 	unsigned baud_rate; /* set baud rate */
 	u8 line_control; /* set line control value RTS/DTR */
 	u8 line_status; /* active status of modem control inputs */
+	u8 lcr;
 };
 
 static void ch341_set_termios(struct tty_struct *tty,
@@ -232,7 +233,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	r = ch341_init_set_baudrate(dev, priv, 0);
+	r = ch341_init_set_baudrate(dev, priv, priv->lcr);
 	if (r < 0)
 		goto out;
 
@@ -397,6 +398,8 @@ static void ch341_set_termios(struct tty_struct *tty,
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);
+		} else if (r == 0) {
+			priv->lcr = ctrl;
 		}
 	}
 

From 55fa15b5987db22b4f35d3f0798928c126be5f1c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:16 +0100
Subject: [PATCH 135/297] USB: serial: ch341: fix baud rate and line-control
 handling

Revert to using direct register writes to set the divisor and
line-control registers.

A recent change switched to using the init vendor command to update
these registers, something which also enabled support for CH341A
devices. It turns out that simply setting bit 7 in the divisor register
is sufficient to support CH341A and specifically prevent data from being
buffered until a full endpoint-size packet (32 bytes) has been received.

Using the init command also had the side-effect of temporarily
deasserting the DTR/RTS signals on every termios change (including
initialisation on open) something which for example could cause problems
in setups where DTR is used to trigger a reset.

Fixes: 4e46c410e050 ("USB: serial: ch341: reinitialize chip on
reconfiguration")
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index eabdd05a2147..8d7b0847109b 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -133,8 +133,8 @@ static int ch341_control_in(struct usb_device *dev,
 	return r;
 }
 
-static int ch341_init_set_baudrate(struct usb_device *dev,
-				   struct ch341_private *priv, unsigned ctrl)
+static int ch341_set_baudrate_lcr(struct usb_device *dev,
+				  struct ch341_private *priv, u8 lcr)
 {
 	short a;
 	int r;
@@ -157,9 +157,19 @@ static int ch341_init_set_baudrate(struct usb_device *dev,
 	factor = 0x10000 - factor;
 	a = (factor & 0xff00) | divisor;
 
-	/* 0x9c is "enable SFR_UART Control register and timer" */
-	r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT,
-			      0x9c | (ctrl << 8), a | 0x80);
+	/*
+	 * CH341A buffers data until a full endpoint-size packet (32 bytes)
+	 * has been received unless bit 7 is set.
+	 */
+	a |= BIT(7);
+
+	r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x1312, a);
+	if (r)
+		return r;
+
+	r = ch341_control_out(dev, CH341_REQ_WRITE_REG, 0x2518, lcr);
+	if (r)
+		return r;
 
 	return r;
 }
@@ -233,7 +243,7 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	r = ch341_init_set_baudrate(dev, priv, priv->lcr);
+	r = ch341_set_baudrate_lcr(dev, priv, priv->lcr);
 	if (r < 0)
 		goto out;
 
@@ -394,7 +404,7 @@ static void ch341_set_termios(struct tty_struct *tty,
 	if (baud_rate) {
 		priv->baud_rate = baud_rate;
 
-		r = ch341_init_set_baudrate(port->serial->dev, priv, ctrl);
+		r = ch341_set_baudrate_lcr(port->serial->dev, priv, ctrl);
 		if (r < 0 && old_termios) {
 			priv->baud_rate = tty_termios_baud_rate(old_termios);
 			tty_termios_copy_hw(&tty->termios, old_termios);

From 811a919135b980bac8009d042acdccf10dc1ef5e Mon Sep 17 00:00:00 2001
From: Zefir Kurtisi <zefir.kurtisi@neratec.com>
Date: Fri, 6 Jan 2017 12:14:48 +0100
Subject: [PATCH 136/297] phy state machine: failsafe leave invalid RUNNING
 state

While in RUNNING state, phy_state_machine() checks for link changes by
comparing phydev->link before and after calling phy_read_status().
This works as long as it is guaranteed that phydev->link is never
changed outside the phy_state_machine().

If in some setups this happens, it causes the state machine to miss
a link loss and remain RUNNING despite phydev->link being 0.

This has been observed running a dsa setup with a process continuously
polling the link states over ethtool each second (SNMPD RFC-1213
agent). Disconnecting the link on a phy followed by a ETHTOOL_GSET
causes dsa_slave_get_settings() / dsa_slave_get_link_ksettings() to
call phy_read_status() and with that modify the link status - and
with that bricking the phy state machine.

This patch adds a fail-safe check while in RUNNING, which causes to
move to CHANGELINK when the link is gone and we are still RUNNING.

Signed-off-by: Zefir Kurtisi <zefir.kurtisi@neratec.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 25f93a98863b..48da6e93c3f7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1065,6 +1065,15 @@ void phy_state_machine(struct work_struct *work)
 			if (old_link != phydev->link)
 				phydev->state = PHY_CHANGELINK;
 		}
+		/*
+		 * Failsafe: check that nobody set phydev->link=0 between two
+		 * poll cycles, otherwise we won't leave RUNNING state as long
+		 * as link remains down.
+		 */
+		if (!phydev->link && phydev->state == PHY_RUNNING) {
+			phydev->state = PHY_CHANGELINK;
+			phydev_err(phydev, "no link in PHY_RUNNING\n");
+		}
 		break;
 	case PHY_CHANGELINK:
 		err = phy_read_status(phydev);

From 67c408cfa8862fe7e45b3a1f762f7140e03b7217 Mon Sep 17 00:00:00 2001
From: Alexander Alemayhu <alexander@alemayhu.com>
Date: Sat, 7 Jan 2017 23:53:00 +0100
Subject: [PATCH 137/297] ipv6: fix typos

o s/approriate/appropriate
o s/discouvery/discovery

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8417c41d8ec8..ce5aaf448c54 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1464,7 +1464,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
-	 * check if the redirect has come from approriate router.
+	 * check if the redirect has come from appropriate router.
 	 *
 	 * RFC 4861 specifies that redirects should only be
 	 * accepted if they come from the nexthop to the target.
@@ -2768,7 +2768,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   old MTU is the lowest MTU in the path, update the route PMTU
 	   to reflect the increase. In this case if the other nodes' MTU
 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
-	   PMTU discouvery.
+	   PMTU discovery.
 	 */
 	if (rt->dst.dev == arg->dev &&
 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&

From b007f09072ca8afa118ade333e717ba443e8d807 Mon Sep 17 00:00:00 2001
From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Date: Mon, 9 Jan 2017 10:45:49 +0300
Subject: [PATCH 138/297] ipv4: make tcp_notsent_lowat sysctl knob behave as
 true unsigned int

> cat /proc/sys/net/ipv4/tcp_notsent_lowat
-1
> echo 4294967295 > /proc/sys/net/ipv4/tcp_notsent_lowat
-bash: echo: write error: Invalid argument
> echo -2147483648 > /proc/sys/net/ipv4/tcp_notsent_lowat
> cat /proc/sys/net/ipv4/tcp_notsent_lowat
-2147483648

but in documentation we have "tcp_notsent_lowat - UNSIGNED INTEGER"

v2: simplify to just proc_douintvec
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 22cbd61079b5..b2fa498b15d1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -951,7 +951,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_tcp_notsent_lowat,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_douintvec,
 	},
 	{
 		.procname	= "tcp_tw_reuse",

From ce7e40c432ba84da104438f6799d460a4cad41bc Mon Sep 17 00:00:00 2001
From: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Date: Mon, 9 Jan 2017 20:57:48 +0700
Subject: [PATCH 139/297] net/appletalk: Fix kernel memory disclosure

ipddp_route structs contain alignment padding so kernel heap memory
is leaked when they are copied to user space in
ipddp_ioctl(SIOCFINDIPDDPRT). Change kmalloc() to kzalloc() to clear
that memory.

Signed-off-by: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ipddp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index b8c293373ecc..a306de4318d7 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -190,7 +190,7 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
  */
 static int ipddp_create(struct ipddp_route *new_rt)
 {
-        struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL);
+        struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL);
 
         if (rt == NULL)
                 return -ENOMEM;

From 2ebae8bd60188f57e26e95e7fde6b8943297d348 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 9 Jan 2017 15:17:27 +0100
Subject: [PATCH 140/297] net: phy: Add Meson GXL PHY hardware dependency

As I understand it the Meson GXL PHY driver is only useful on one
architecture so only make it visible on that architecture.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: 7334b3e47aee ("net: phy: Add Meson GXL Internal PHY driver")
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index d361835b315d..8dbd59baa34d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -279,6 +279,7 @@ config MARVELL_PHY
 
 config MESON_GXL_PHY
 	tristate "Amlogic Meson GXL Internal PHY"
+	depends on ARCH_MESON || COMPILE_TEST
 	---help---
 	  Currently has a driver for the Amlogic Meson GXL Internal PHY
 

From 6bb629db5e7daa619f5242b6ad93e4dd9bf7432c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 9 Jan 2017 08:51:32 -0800
Subject: [PATCH 141/297] tcp: do not export tcp_peer_is_proven()

After commit 1fb6f159fd21 ("tcp: add tcp_conn_request"),
tcp_peer_is_proven() no longer needs to be exported.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_metrics.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d46f4d5b1c62..ba8f02d0f283 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -606,7 +606,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
 
 void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
 {

From 5149fd327f16e393c1d04fa5325ab072c32472bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:36:30 -0800
Subject: [PATCH 142/297] xfs: bump up reserved blocks in xfs_alloc_set_aside

Setting aside 4 blocks globally for bmbt splits isn't all that useful,
as different threads can allocate space in parallel.  Bump it to 4
blocks per AG to allow each thread that is currently doing an
allocation to dip into it separately.  Without that we may no have
enough reserved blocks if there are enough parallel transactions
in an almost out space file system that all run into bmap btree
splits.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5050056a0b06..0a46f8488b8d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -95,10 +95,7 @@ unsigned int
 xfs_alloc_set_aside(
 	struct xfs_mount	*mp)
 {
-	unsigned int		blocks;
-
-	blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
-	return blocks;
+	return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
 }
 
 /*

From 255c516278175a6dc7037d1406307f35237d8688 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:36:19 -0800
Subject: [PATCH 143/297] xfs: fix bogus minleft manipulations

We can't just set minleft to 0 when we're low on space - that's exactly
what we need minleft for: to protect space in the AG for btree block
allocations when we are low on free space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c      | 24 +++++++-----------------
 fs/xfs/libxfs/xfs_bmap.c       |  3 ---
 fs/xfs/libxfs/xfs_bmap_btree.c |  3 +--
 3 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0a46f8488b8d..fe925702c955 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2635,12 +2635,10 @@ xfs_alloc_vextent(
 	xfs_agblock_t	agsize;	/* allocation group size */
 	int		error;
 	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
-	xfs_extlen_t	minleft;/* minimum left value, temp copy */
 	xfs_mount_t	*mp;	/* mount structure pointer */
 	xfs_agnumber_t	sagno;	/* starting allocation group number */
 	xfs_alloctype_t	type;	/* input allocation type */
 	int		bump_rotor = 0;
-	int		no_min = 0;
 	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
 
 	mp = args->mp;
@@ -2669,7 +2667,6 @@ xfs_alloc_vextent(
 		trace_xfs_alloc_vextent_badargs(args);
 		return 0;
 	}
-	minleft = args->minleft;
 
 	switch (type) {
 	case XFS_ALLOCTYPE_THIS_AG:
@@ -2680,9 +2677,7 @@ xfs_alloc_vextent(
 		 */
 		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
 		args->pag = xfs_perag_get(mp, args->agno);
-		args->minleft = 0;
 		error = xfs_alloc_fix_freelist(args, 0);
-		args->minleft = minleft;
 		if (error) {
 			trace_xfs_alloc_vextent_nofix(args);
 			goto error0;
@@ -2747,9 +2742,7 @@ xfs_alloc_vextent(
 		 */
 		for (;;) {
 			args->pag = xfs_perag_get(mp, args->agno);
-			if (no_min) args->minleft = 0;
 			error = xfs_alloc_fix_freelist(args, flags);
-			args->minleft = minleft;
 			if (error) {
 				trace_xfs_alloc_vextent_nofix(args);
 				goto error0;
@@ -2789,20 +2782,17 @@ xfs_alloc_vextent(
 			 * or switch to non-trylock mode.
 			 */
 			if (args->agno == sagno) {
-				if (no_min == 1) {
+				if (flags == 0) {
 					args->agbno = NULLAGBLOCK;
 					trace_xfs_alloc_vextent_allfailed(args);
 					break;
 				}
-				if (flags == 0) {
-					no_min = 1;
-				} else {
-					flags = 0;
-					if (type == XFS_ALLOCTYPE_START_BNO) {
-						args->agbno = XFS_FSB_TO_AGBNO(mp,
-							args->fsbno);
-						args->type = XFS_ALLOCTYPE_NEAR_BNO;
-					}
+
+				flags = 0;
+				if (type == XFS_ALLOCTYPE_START_BNO) {
+					args->agbno = XFS_FSB_TO_AGBNO(mp,
+						args->fsbno);
+					args->type = XFS_ALLOCTYPE_NEAR_BNO;
 				}
 			}
 			xfs_perag_put(args->pag);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760bc3b2536..44773c9eb957 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3812,7 +3812,6 @@ xfs_bmap_btalloc(
 		args.fsbno = 0;
 		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		args.total = ap->minlen;
-		args.minleft = 0;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
 		ap->dfops->dop_low = true;
@@ -4344,8 +4343,6 @@ xfs_bmapi_allocate(
 	if (error)
 		return error;
 
-	if (bma->dfops->dop_low)
-		bma->minleft = 0;
 	if (bma->cur)
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
 	if (bma->blkno == NULLFSBLOCK)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d6330c297ca0..d9be241fc86f 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -502,12 +502,11 @@ try_another_ag:
 	if (args.fsbno == NULLFSBLOCK && args.minleft) {
 		/*
 		 * Could not find an AG with enough free space to satisfy
-		 * a full btree split.  Try again without minleft and if
+		 * a full btree split.  Try again and if
 		 * successful activate the lowspace algorithm.
 		 */
 		args.fsbno = 0;
 		args.type = XFS_ALLOCTYPE_FIRST_AG;
-		args.minleft = 0;
 		error = xfs_alloc_vextent(&args);
 		if (error)
 			goto error0;

From 54fee133ad59c87ab01dd84ab3e9397134b32acb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:44:30 -0800
Subject: [PATCH 144/297] xfs: adjust allocation length in
 xfs_alloc_space_available

We must decide in xfs_alloc_fix_freelist if we can perform an
allocation from a given AG is possible or not based on the available
space, and should not fail the allocation past that point on a
healthy file system.

But currently we have two additional places that second-guess
xfs_alloc_fix_freelist: xfs_alloc_ag_vextent tries to adjust the
maxlen parameter to remove the reservation before doing the
allocation (but ignores the various minium freespace requirements),
and xfs_alloc_fix_minleft tries to fix up the allocated length
after we've found an extent, but ignores the reservations and also
doesn't take the AGFL into account (and thus fails allocations
for not matching minlen in some cases).

Remove all these later fixups and just correct the maxlen argument
inside xfs_alloc_fix_freelist once we have the AGF buffer locked.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 81 ++++++++-------------------------------
 fs/xfs/libxfs/xfs_alloc.h |  2 +-
 2 files changed, 18 insertions(+), 65 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fe925702c955..f2e7eb6e5243 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -362,35 +362,11 @@ xfs_alloc_fix_len(
 		return;
 	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 	ASSERT(rlen % args->prod == args->mod);
+	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
+		rlen + args->minleft);
 	args->len = rlen;
 }
 
-/*
- * Fix up length if there is too little space left in the a.g.
- * Return 1 if ok, 0 if too little, should give up.
- */
-STATIC int
-xfs_alloc_fix_minleft(
-	xfs_alloc_arg_t	*args)		/* allocation argument structure */
-{
-	xfs_agf_t	*agf;		/* a.g. freelist header */
-	int		diff;		/* free space difference */
-
-	if (args->minleft == 0)
-		return 1;
-	agf = XFS_BUF_TO_AGF(args->agbp);
-	diff = be32_to_cpu(agf->agf_freeblks)
-		- args->len - args->minleft;
-	if (diff >= 0)
-		return 1;
-	args->len += diff;		/* shrink the allocated space */
-	/* casts to (int) catch length underflows */
-	if ((int)args->len >= (int)args->minlen)
-		return 1;
-	args->agbno = NULLAGBLOCK;
-	return 0;
-}
-
 /*
  * Update the two btrees, logically removing from freespace the extent
  * starting at rbno, rlen blocks.  The extent is contained within the
@@ -686,8 +662,6 @@ xfs_alloc_ag_vextent(
 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 {
 	int		error=0;
-	xfs_extlen_t	reservation;
-	xfs_extlen_t	oldmax;
 
 	ASSERT(args->minlen > 0);
 	ASSERT(args->maxlen > 0);
@@ -695,20 +669,6 @@ xfs_alloc_ag_vextent(
 	ASSERT(args->mod < args->prod);
 	ASSERT(args->alignment > 0);
 
-	/*
-	 * Clamp maxlen to the amount of free space minus any reservations
-	 * that have been made.
-	 */
-	oldmax = args->maxlen;
-	reservation = xfs_ag_resv_needed(args->pag, args->resv);
-	if (args->maxlen > args->pag->pagf_freeblks - reservation)
-		args->maxlen = args->pag->pagf_freeblks - reservation;
-	if (args->maxlen == 0) {
-		args->agbno = NULLAGBLOCK;
-		args->maxlen = oldmax;
-		return 0;
-	}
-
 	/*
 	 * Branch to correct routine based on the type.
 	 */
@@ -728,8 +688,6 @@ xfs_alloc_ag_vextent(
 		/* NOTREACHED */
 	}
 
-	args->maxlen = oldmax;
-
 	if (error || args->agbno == NULLAGBLOCK)
 		return error;
 
@@ -838,9 +796,6 @@ xfs_alloc_ag_vextent_exact(
 	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 						- args->agbno;
 	xfs_alloc_fix_len(args);
-	if (!xfs_alloc_fix_minleft(args))
-		goto not_found;
-
 	ASSERT(args->agbno + args->len <= tend);
 
 	/*
@@ -1146,12 +1101,7 @@ restart:
 		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 		ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 		args->len = blen;
-		if (!xfs_alloc_fix_minleft(args)) {
-			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-			trace_xfs_alloc_near_nominleft(args);
-			return 0;
-		}
-		blen = args->len;
+
 		/*
 		 * We are allocating starting at bnew for blen blocks.
 		 */
@@ -1343,12 +1293,6 @@ restart:
 	 */
 	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 	xfs_alloc_fix_len(args);
-	if (!xfs_alloc_fix_minleft(args)) {
-		trace_xfs_alloc_near_nominleft(args);
-		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-		return 0;
-	}
 	rlen = args->len;
 	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
 				     args->datatype, ltbnoa, ltlena, &ltnew);
@@ -1550,8 +1494,6 @@ restart:
 	}
 	xfs_alloc_fix_len(args);
 
-	if (!xfs_alloc_fix_minleft(args))
-		goto out_nominleft;
 	rlen = args->len;
 	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
 	/*
@@ -2070,10 +2012,20 @@ xfs_alloc_space_available(
 
 	/* do we have enough free space remaining for the allocation? */
 	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
-			  reservation - min_free - args->total);
-	if (available < (int)args->minleft || available <= 0)
+			  reservation - min_free - args->minleft);
+	if (available < (int)args->total)
 		return false;
 
+	/*
+	 * Clamp maxlen to the amount of free space available for the actual
+	 * extent allocation.
+	 */
+	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
+		args->maxlen = available;
+		ASSERT(args->maxlen > 0);
+		ASSERT(args->maxlen >= args->minlen);
+	}
+
 	return true;
 }
 
@@ -2119,7 +2071,8 @@ xfs_alloc_fix_freelist(
 	}
 
 	need = xfs_alloc_min_freelist(mp, pag);
-	if (!xfs_alloc_space_available(args, need, flags))
+	if (!xfs_alloc_space_available(args, need, flags |
+			XFS_ALLOC_FLAG_CHECK))
 		goto out_agbp_relse;
 
 	/*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7c404a6b0ae3..1d0f48a501a3 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t;
 #define	XFS_ALLOC_FLAG_FREEING	0x00000002  /* indicate caller is freeing extents*/
 #define	XFS_ALLOC_FLAG_NORMAP	0x00000004  /* don't modify the rmapbt */
 #define	XFS_ALLOC_FLAG_NOSHRINK	0x00000008  /* don't shrink the freelist */
-
+#define	XFS_ALLOC_FLAG_CHECK	0x00000010  /* test only, don't modify args */
 
 /*
  * Argument structure for xfs_alloc routines.

From 12ef830198b0d71668eb9b59f9ba69d32951a48a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:39:35 -0800
Subject: [PATCH 145/297] xfs: don't rely on ->total in
 xfs_alloc_space_available

->total is a bit of an odd parameter passed down to the low-level
allocator all the way from the high-level callers.  It's supposed to
contain the maximum number of blocks to be allocated for the whole
transaction [1].

But in xfs_iomap_write_allocate we only convert existing delayed
allocations and thus only have a minimal block reservation for the
current transaction, so xfs_alloc_space_available can't use it for
the allocation decisions.  Use the maximum of args->total and the
calculated block requirement to make a decision.  We probably should
get rid of args->total eventually and instead apply ->minleft more
broadly, but that will require some extensive changes all over.

[1] which creates lots of confusion as most callers don't decrement it
once doing a first allocation.  But that's for a separate series.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index f2e7eb6e5243..9f06a211e157 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1995,7 +1995,7 @@ xfs_alloc_space_available(
 	int			flags)
 {
 	struct xfs_perag	*pag = args->pag;
-	xfs_extlen_t		longest;
+	xfs_extlen_t		alloc_len, longest;
 	xfs_extlen_t		reservation; /* blocks that are still reserved */
 	int			available;
 
@@ -2005,15 +2005,16 @@ xfs_alloc_space_available(
 	reservation = xfs_ag_resv_needed(pag, args->resv);
 
 	/* do we have enough contiguous free space for the allocation? */
+	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
 	longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
 			reservation);
-	if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+	if (longest < alloc_len)
 		return false;
 
 	/* do we have enough free space remaining for the allocation? */
 	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
 			  reservation - min_free - args->minleft);
-	if (available < (int)args->total)
+	if (available < (int)max(args->total, alloc_len))
 		return false;
 
 	/*

From 84a4620cfe97c9d57e39b2369bfb77faff55063d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 13:41:33 -0800
Subject: [PATCH 146/297] xfs: don't print warnings when xfs_log_force fails

There are only two reasons for xfs_log_force / xfs_log_force_lsn to fail:
one is an I/O error, for which xlog_bdstrat already logs a warning, and
the second is an already shutdown log due to a previous I/O errors.  In
the latter case we'll already have a previous indication for the actual
error, but the large stream of misleading warnings from xfs_log_force
will probably scroll it out of the message buffer.

Simply removing the warnings thus makes the XFS log reporting significantly
better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c39ac14ff540..b1469f0a91a6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3317,12 +3317,8 @@ xfs_log_force(
 	xfs_mount_t	*mp,
 	uint		flags)
 {
-	int	error;
-
 	trace_xfs_log_force(mp, 0, _RET_IP_);
-	error = _xfs_log_force(mp, flags, NULL);
-	if (error)
-		xfs_warn(mp, "%s: error %d returned.", __func__, error);
+	_xfs_log_force(mp, flags, NULL);
 }
 
 /*
@@ -3466,12 +3462,8 @@ xfs_log_force_lsn(
 	xfs_lsn_t	lsn,
 	uint		flags)
 {
-	int	error;
-
 	trace_xfs_log_force(mp, lsn, _RET_IP_);
-	error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
-	if (error)
-		xfs_warn(mp, "%s: error %d returned.", __func__, error);
+	_xfs_log_force_lsn(mp, lsn, flags, NULL);
 }
 
 /*

From 32cd7cbbacf700885a2316275f188f2d5739b5f4 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Fri, 6 Jan 2017 19:31:35 -0500
Subject: [PATCH 147/297] md/raid5: Use correct IS_ERR() variation on pointer
 check

This fixes a build error on certain architectures, such as ppc64.

Fixes: 6995f0b247e("md: takeover should clear unrelated bits")
Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7b1da6e95a56..36c13e4be9c9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7831,7 +7831,7 @@ static void *raid5_takeover_raid1(struct mddev *mddev)
 	mddev->new_chunk_sectors = chunksect;
 
 	ret = setup_conf(mddev);
-	if (!IS_ERR_VALUE(ret))
+	if (!IS_ERR(ret))
 		mddev_clear_unsupported_flags(mddev,
 			UNSUPPORTED_MDDEV_FLAGS);
 	return ret;

From 5dedade6dfa243c130b85d1e4daba6f027805033 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:43 +0100
Subject: [PATCH 148/297] x86/CPU: Add native CPUID variants returning a single
 datum

... similarly to the cpuid_<reg>() variants.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-2-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/processor.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eaf100508c36..1be64da0384e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	    : "memory");
 }
 
+#define native_cpuid_reg(reg)					\
+static inline unsigned int native_cpuid_##reg(unsigned int op)	\
+{								\
+	unsigned int eax = op, ebx, ecx = 0, edx;		\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum.
+ */
+native_cpuid_reg(eax)
+native_cpuid_reg(ebx)
+native_cpuid_reg(ecx)
+native_cpuid_reg(edx)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));

From f3e2a51f568d9f33370f4e8bb05669a34223241a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:44 +0100
Subject: [PATCH 149/297] x86/microcode: Use native CPUID to tickle out
 microcode revision

Intel supplies the microcode revision value in MSR 0x8b
(IA32_BIOS_SIGN_ID) after CPUID(1) has been executed. Execute it each
time before reading that MSR.

It used to do sync_core() which did do CPUID but

  c198b121b1a1 ("x86/asm: Rewrite sync_core() to use IRET-to-self")

changed the sync_core() implementation so we better make the microcode
loading case explicit, as the SDM documents it.

Reported-and-tested-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-3-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c           |  2 +-
 arch/x86/kernel/cpu/microcode/intel.c | 26 +++-----------------------
 2 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d2bb03..2d49aa949fa1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -83,7 +83,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
 		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 		/* Required by the SDM */
-		sync_core();
+		native_cpuid_eax(1);
 		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
 	}
 
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index b624b54912e1..f79249fab389 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -368,26 +368,6 @@ next:
 	return patch;
 }
 
-static void cpuid_1(void)
-{
-	/*
-	 * According to the Intel SDM, Volume 3, 9.11.7:
-	 *
-	 *   CPUID returns a value in a model specific register in
-	 *   addition to its usual register return values. The
-	 *   semantics of CPUID cause it to deposit an update ID value
-	 *   in the 64-bit model-specific register at address 08BH
-	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
-	 *   processor, the value in the MSR remains unmodified.
-	 *
-	 * Use native_cpuid -- this code runs very early and we don't
-	 * want to mess with paravirt.
-	 */
-	unsigned int eax = 1, ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-}
-
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -413,7 +393,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -613,7 +593,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -825,7 +805,7 @@ static int apply_microcode_intel(int cpu)
 	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
+	native_cpuid_eax(1);
 
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);

From 4167709bbf826512a52ebd6aafda2be104adaec9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 9 Jan 2017 12:41:45 +0100
Subject: [PATCH 150/297] x86/microcode/intel: Add a helper which gives the
 microcode revision

Since on Intel we're required to do CPUID(1) first, before reading
the microcode revision MSR, let's add a special helper which does the
required steps so that we don't forget to do them next time, when we
want to read the microcode revision.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170109114147.5082-4-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/microcode_intel.h | 15 +++++++++
 arch/x86/kernel/cpu/intel.c            | 11 ++-----
 arch/x86/kernel/cpu/microcode/intel.c  | 43 ++++++++------------------
 3 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 195becc6f780..e793fc9a9b20 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,6 +52,21 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2d49aa949fa1..203f860d2ab3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -14,6 +14,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
+#include <asm/microcode_intel.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
-	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
-		unsigned lower_word;
-
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		/* Required by the SDM */
-		native_cpuid_eax(1);
-		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
-	}
+	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
+		c->microcode = intel_get_microcode_revision();
 
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f79249fab389..faec8fa68ffd 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -390,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
+	csig.rev = intel_get_microcode_revision();
 
 	uci->cpu_sig = csig;
 	uci->valid = 1;
@@ -582,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci)
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
 	struct microcode_intel *mc;
-	unsigned int val[2];
+	u32 rev;
 
 	mc = uci->mc;
 	if (!mc)
@@ -590,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc->hdr.rev)
+	rev = intel_get_microcode_revision();
+	if (rev != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
 	/* Flush global tlb. This is precaution. */
 	flush_tlb_early();
 #endif
-	uci->cpu_sig.rev = val[1];
+	uci->cpu_sig.rev = rev;
 
 	if (early)
 		print_ucode(uci);
@@ -784,8 +772,8 @@ static int apply_microcode_intel(int cpu)
 	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
 	struct cpuinfo_x86 *c;
-	unsigned int val[2];
 	static int prev_rev;
+	u32 rev;
 
 	/* We should bind the task to the CPU */
 	if (WARN_ON(raw_smp_processor_id() != cpu))
@@ -802,33 +790,28 @@ static int apply_microcode_intel(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
+	rev = intel_get_microcode_revision();
 
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	if (val[1] != mc->hdr.rev) {
+	if (rev != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
 		       cpu, mc->hdr.rev);
 		return -1;
 	}
 
-	if (val[1] != prev_rev) {
+	if (rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
-			val[1],
+			rev,
 			mc->hdr.date & 0xffff,
 			mc->hdr.date >> 24,
 			(mc->hdr.date >> 16) & 0xff);
-		prev_rev = val[1];
+		prev_rev = rev;
 	}
 
 	c = &cpu_data(cpu);
 
-	uci->cpu_sig.rev = val[1];
-	c->microcode = val[1];
+	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
 
 	return 0;
 }

From 9fcf5ba2ef908af916e9002891fbbca20ce4dc98 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 9 Jan 2017 12:41:46 +0100
Subject: [PATCH 151/297] x86/microcode/intel: Fix allocation size of struct
 ucode_patch

We allocate struct ucode_patch here. @size is the size of microcode data
and used for kmemdup() later in this function.

Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/7a730dc9-ac17-35c4-fe76-dfc94e5ecd95@ce.jp.nec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index faec8fa68ffd..943486589757 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 

From 2e86222c67bb5d942da68e8415749b32db208534 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 9 Jan 2017 12:41:47 +0100
Subject: [PATCH 152/297] x86/microcode/intel: Use correct buffer size for
 saving microcode data

In generic_load_microcode(), curr_mc_size is the size of the last
allocated buffer and since we have this performance "optimization"
there to vmalloc a new buffer only when the current one is bigger,
curr_mc_size ends up becoming the size of the biggest buffer we've seen
so far.

However, we end up saving the microcode patch which matches our CPU
and its size is not curr_mc_size but the respective mc_size during the
iteration while we're staring at it.

So save that mc_size into a separate variable and use it to store the
previously found microcode buffer.

Without this fix, we could get oops like this:

  BUG: unable to handle kernel paging request at ffffc9000e30f000
  IP: __memcpy+0x12/0x20
  ...
  Call Trace:
  ? kmemdup+0x43/0x60
  __alloc_microcode_buf+0x44/0x70
  save_microcode_patch+0xd4/0x150
  generic_load_microcode+0x1b8/0x260
  request_microcode_user+0x15/0x20
  microcode_write+0x91/0x100
  __vfs_write+0x34/0x120
  vfs_write+0xc1/0x130
  SyS_write+0x56/0xc0
  do_syscall_64+0x6c/0x160
  entry_SYSCALL64_slow_path+0x25/0x25

Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/4f33cbfd-44f2-9bed-3b66-7446cd14256f@ce.jp.nec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/intel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 943486589757..3f329b74e040 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -823,7 +823,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
-	unsigned int curr_mc_size = 0;
+	unsigned int curr_mc_size = 0, new_mc_size = 0;
 	unsigned int csig, cpf;
 
 	while (leftover) {
@@ -864,6 +864,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
+			new_mc_size = mc_size;
 			mc = NULL;	/* trigger new vmalloc */
 		}
 
@@ -889,7 +890,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	 * permanent memory. So it will be loaded early when a CPU is hot added
 	 * or resumes.
 	 */
-	save_mc_for_early(new_mc, curr_mc_size);
+	save_mc_for_early(new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);

From 3895dbf8985f656675b5bde610723a29cbce3fa7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Jan 2017 14:18:43 +1300
Subject: [PATCH 153/297] mnt: Protect the mountpoint hashtable with mount_lock

Protecting the mountpoint hashtable with namespace_sem was sufficient
until a call to umount_mnt was added to mntput_no_expire.  At which
point it became possible for multiple calls of put_mountpoint on
the same hash chain to happen on the same time.

Kristen Johansen <kjlx@templeofstupid.com> reported:
> This can cause a panic when simultaneous callers of put_mountpoint
> attempt to free the same mountpoint.  This occurs because some callers
> hold the mount_hash_lock, while others hold the namespace lock.  Some
> even hold both.
>
> In this submitter's case, the panic manifested itself as a GP fault in
> put_mountpoint() when it called hlist_del() and attempted to dereference
> a m_hash.pprev that had been poisioned by another thread.

Al Viro observed that the simple fix is to switch from using the namespace_sem
to the mount_lock to protect the mountpoint hash table.

I have taken Al's suggested patch moved put_mountpoint in pivot_root
(instead of taking mount_lock an additional time), and have replaced
new_mountpoint with get_mountpoint a function that does the hash table
lookup and addition under the mount_lock.   The introduction of get_mounptoint
ensures that only the mount_lock is needed to manipulate the mountpoint
hashtable.

d_set_mounted is modified to only set DCACHE_MOUNTED if it is not
already set.  This allows get_mountpoint to use the setting of
DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry
happens exactly once.

Cc: stable@vger.kernel.org
Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts")
Reported-by: Krister Johansen <kjlx@templeofstupid.com>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/dcache.c    |  7 ++++--
 fs/namespace.c | 68 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 769903dbc19d..95d71eda8142 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry)
 	}
 	spin_lock(&dentry->d_lock);
 	if (!d_unlinked(dentry)) {
-		dentry->d_flags |= DCACHE_MOUNTED;
-		ret = 0;
+		ret = -EBUSY;
+		if (!d_mountpoint(dentry)) {
+			dentry->d_flags |= DCACHE_MOUNTED;
+			ret = 0;
+		}
 	}
  	spin_unlock(&dentry->d_lock);
 out:
diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259e064f..487ba30bb5c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 	return NULL;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
 {
-	struct hlist_head *chain = mp_hash(dentry);
-	struct mountpoint *mp;
+	struct mountpoint *mp, *new = NULL;
 	int ret;
 
-	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-	if (!mp)
-		return ERR_PTR(-ENOMEM);
-
-	ret = d_set_mounted(dentry);
-	if (ret) {
-		kfree(mp);
-		return ERR_PTR(ret);
+	if (d_mountpoint(dentry)) {
+mountpoint:
+		read_seqlock_excl(&mount_lock);
+		mp = lookup_mountpoint(dentry);
+		read_sequnlock_excl(&mount_lock);
+		if (mp)
+			goto done;
 	}
 
-	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
-	INIT_HLIST_HEAD(&mp->m_list);
+	if (!new)
+		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+
+	/* Exactly one processes may set d_mounted */
+	ret = d_set_mounted(dentry);
+
+	/* Someone else set d_mounted? */
+	if (ret == -EBUSY)
+		goto mountpoint;
+
+	/* The dentry is not available as a mountpoint? */
+	mp = ERR_PTR(ret);
+	if (ret)
+		goto done;
+
+	/* Add the new mountpoint to the hash table */
+	read_seqlock_excl(&mount_lock);
+	new->m_dentry = dentry;
+	new->m_count = 1;
+	hlist_add_head(&new->m_hash, mp_hash(dentry));
+	INIT_HLIST_HEAD(&new->m_list);
+	read_sequnlock_excl(&mount_lock);
+
+	mp = new;
+	new = NULL;
+done:
+	kfree(new);
 	return mp;
 }
 
@@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry)
 	struct mount *mnt;
 
 	namespace_lock();
+	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
 	if (IS_ERR_OR_NULL(mp))
 		goto out_unlock;
 
-	lock_mount_hash();
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry)
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
-	unlock_mount_hash();
 	put_mountpoint(mp);
 out_unlock:
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -2038,9 +2062,7 @@ retry:
 	namespace_lock();
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
-		struct mountpoint *mp = lookup_mountpoint(dentry);
-		if (!mp)
-			mp = new_mountpoint(dentry);
+		struct mountpoint *mp = get_mountpoint(dentry);
 		if (IS_ERR(mp)) {
 			namespace_unlock();
 			inode_unlock(dentry->d_inode);
@@ -2059,7 +2081,11 @@ retry:
 static void unlock_mount(struct mountpoint *where)
 {
 	struct dentry *dentry = where->m_dentry;
+
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(where);
+	read_sequnlock_excl(&mount_lock);
+
 	namespace_unlock();
 	inode_unlock(dentry->d_inode);
 }
@@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	/* A moved mount should not expire automatically */
 	list_del_init(&new_mnt->mnt_expire);
+	put_mountpoint(root_mp);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
-	put_mountpoint(root_mp);
 	error = 0;
 out4:
 	unlock_mount(old_mp);

From 75422726b0f717d67db3283c2eb5bc14fa2619c5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 Jan 2017 17:37:27 +1300
Subject: [PATCH 154/297] libfs: Modify mount_pseudo_xattr to be clear it is
 not a userspace mount

Add MS_KERNMOUNT to the flags that are passed.
Use sget_userns and force &init_user_ns instead of calling sget so that
even if called from a weird context the internal filesystem will be
considered to be in the intial user namespace.

Luis Ressel reported that the the failure to pass MS_KERNMOUNT into
mount_pseudo broke his in development graphics driver that uses the
generic drm infrastructure.  I am not certain the deriver was bug
free in it's usage of that infrastructure but since
mount_pseudo_xattr can never be triggered by userspace it is clearer
and less error prone, and less problematic for the code to be explicit.

Reported-by: Luis Ressel <aranea@aixah.de>
Tested-by: Luis Ressel <aranea@aixah.de>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/libfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index e973cd51f126..28d6f35feed6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
 	struct inode *root;
 	struct qstr d_name = QSTR_INIT(name, strlen(name));
 
-	s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
+	s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+			&init_user_ns, NULL);
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 

From add7c65ca426b7a37184dd3d2172394e23d585d6 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@openvz.org>
Date: Wed, 4 Jan 2017 19:28:14 -0800
Subject: [PATCH 155/297] pid: fix lockdep deadlock warning due to ucount_lock

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
4.10.0-rc2-00024-g4aecec9-dirty #118 Tainted: G        W
---------------------------------------------------------
swapper/1/0 just changed the state of lock:
 (&(&sighand->siglock)->rlock){-.....}, at: [<ffffffffbd0a1bc6>] __lock_task_sighand+0xb6/0x2c0
but this lock took another, HARDIRQ-unsafe lock in the past:
 (ucounts_lock){+.+...}
and interrupts could create inverse lock ordering between them.
other info that might help us debug this:
Chain exists of:                 &(&sighand->siglock)->rlock --> &(&tty->ctrl_lock)->rlock --> ucounts_lock
 Possible interrupt unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(ucounts_lock);
                               local_irq_disable();
                               lock(&(&sighand->siglock)->rlock);
                               lock(&(&tty->ctrl_lock)->rlock);
  <Interrupt>
    lock(&(&sighand->siglock)->rlock);

 *** DEADLOCK ***

This patch removes a dependency between rlock and ucount_lock.

Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces")
Cc: stable@vger.kernel.org
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/pid_namespace.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index df9e8e9e0be7..eef2ce968636 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -151,8 +151,12 @@ out:
 
 static void delayed_free_pidns(struct rcu_head *p)
 {
-	kmem_cache_free(pid_ns_cachep,
-			container_of(p, struct pid_namespace, rcu));
+	struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
+
+	dec_pid_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+
+	kmem_cache_free(pid_ns_cachep, ns);
 }
 
 static void destroy_pid_namespace(struct pid_namespace *ns)
@@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
-	dec_pid_namespaces(ns->ucounts);
-	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
 

From 93362fa47fe98b62e4a34ab408c4a418432e7939 Mon Sep 17 00:00:00 2001
From: Zhou Chengming <zhouchengming1@huawei.com>
Date: Fri, 6 Jan 2017 09:32:32 +0800
Subject: [PATCH 156/297] sysctl: Drop reference added by grab_header in
 proc_sys_readdir

Fixes CVE-2016-9191, proc_sys_readdir doesn't drop reference
added by grab_header when return from !dir_emit_dots path.
It can cause any path called unregister_sysctl_table will
wait forever.

The calltrace of CVE-2016-9191:

[ 5535.960522] Call Trace:
[ 5535.963265]  [<ffffffff817cdaaf>] schedule+0x3f/0xa0
[ 5535.968817]  [<ffffffff817d33fb>] schedule_timeout+0x3db/0x6f0
[ 5535.975346]  [<ffffffff817cf055>] ? wait_for_completion+0x45/0x130
[ 5535.982256]  [<ffffffff817cf0d3>] wait_for_completion+0xc3/0x130
[ 5535.988972]  [<ffffffff810d1fd0>] ? wake_up_q+0x80/0x80
[ 5535.994804]  [<ffffffff8130de64>] drop_sysctl_table+0xc4/0xe0
[ 5536.001227]  [<ffffffff8130de17>] drop_sysctl_table+0x77/0xe0
[ 5536.007648]  [<ffffffff8130decd>] unregister_sysctl_table+0x4d/0xa0
[ 5536.014654]  [<ffffffff8130deff>] unregister_sysctl_table+0x7f/0xa0
[ 5536.021657]  [<ffffffff810f57f5>] unregister_sched_domain_sysctl+0x15/0x40
[ 5536.029344]  [<ffffffff810d7704>] partition_sched_domains+0x44/0x450
[ 5536.036447]  [<ffffffff817d0761>] ? __mutex_unlock_slowpath+0x111/0x1f0
[ 5536.043844]  [<ffffffff81167684>] rebuild_sched_domains_locked+0x64/0xb0
[ 5536.051336]  [<ffffffff8116789d>] update_flag+0x11d/0x210
[ 5536.057373]  [<ffffffff817cf61f>] ? mutex_lock_nested+0x2df/0x450
[ 5536.064186]  [<ffffffff81167acb>] ? cpuset_css_offline+0x1b/0x60
[ 5536.070899]  [<ffffffff810fce3d>] ? trace_hardirqs_on+0xd/0x10
[ 5536.077420]  [<ffffffff817cf61f>] ? mutex_lock_nested+0x2df/0x450
[ 5536.084234]  [<ffffffff8115a9f5>] ? css_killed_work_fn+0x25/0x220
[ 5536.091049]  [<ffffffff81167ae5>] cpuset_css_offline+0x35/0x60
[ 5536.097571]  [<ffffffff8115aa2c>] css_killed_work_fn+0x5c/0x220
[ 5536.104207]  [<ffffffff810bc83f>] process_one_work+0x1df/0x710
[ 5536.110736]  [<ffffffff810bc7c0>] ? process_one_work+0x160/0x710
[ 5536.117461]  [<ffffffff810bce9b>] worker_thread+0x12b/0x4a0
[ 5536.123697]  [<ffffffff810bcd70>] ? process_one_work+0x710/0x710
[ 5536.130426]  [<ffffffff810c3f7e>] kthread+0xfe/0x120
[ 5536.135991]  [<ffffffff817d4baf>] ret_from_fork+0x1f/0x40
[ 5536.142041]  [<ffffffff810c3e80>] ? kthread_create_on_node+0x230/0x230

One cgroup maintainer mentioned that "cgroup is trying to offline
a cpuset css, which takes place under cgroup_mutex.  The offlining
ends up trying to drain active usages of a sysctl table which apprently
is not happening."
The real reason is that proc_sys_readdir doesn't drop reference added
by grab_header when return from !dir_emit_dots path. So this cpuset
offline path will wait here forever.

See here for details: http://www.openwall.com/lists/oss-security/2016/11/04/13

Fixes: f0c3b5093add ("[readdir] convert procfs")
Cc: stable@vger.kernel.org
Reported-by: CAI Qian <caiqian@redhat.com>
Tested-by: Yang Shukui <yangshukui@huawei.com>
Signed-off-by: Zhou Chengming <zhouchengming1@huawei.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/proc_sysctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 55313d994895..d4e37acd4821 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 	ctl_dir = container_of(head, struct ctl_dir, header);
 
 	if (!dir_emit_dots(file, ctx))
-		return 0;
+		goto out;
 
 	pos = 2;
 
@@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 			break;
 		}
 	}
+out:
 	sysctl_head_finish(head);
 	return 0;
 }

From 21d25f6a4217e755906cb548b55ddab39d0e88b9 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Wed, 4 Jan 2017 01:22:52 -0800
Subject: [PATCH 157/297] dmaengine: iota: ioat_alloc_chan_resources should not
 perform sleeping allocations.

On a kernel with DEBUG_LOCKS, ioat_free_chan_resources triggers an
in_interrupt() warning.  With PROVE_LOCKING, it reports detecting a
SOFTIRQ-safe to SOFTIRQ-unsafe lock ordering in the same code path.

This is because dma_generic_alloc_coherent() checks if the GFP flags
permit blocking.  It allocates from different subsystems if blocking is
permitted.  The free path knows how to return the memory to the correct
allocator.  If GFP_KERNEL is specified then the alloc and free end up
going through cma_alloc(), which uses mutexes.

Given that ioat_free_chan_resources() can be called in interrupt
context, ioat_alloc_chan_resources() must specify GFP_NOWAIT so that the
allocations do not block and instead use an allocator that uses
spinlocks.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ioat/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index ace5cb2cb12f..cc5259b881d4 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -700,7 +700,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
 	ioat_chan->completion =
 		dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
-				GFP_KERNEL, &ioat_chan->completion_dma);
+				GFP_NOWAIT, &ioat_chan->completion_dma);
 	if (!ioat_chan->completion)
 		return -ENOMEM;
 
@@ -710,7 +710,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
 	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
 
 	order = IOAT_MAX_ORDER;
-	ring = ioat_alloc_ring(c, order, GFP_KERNEL);
+	ring = ioat_alloc_ring(c, order, GFP_NOWAIT);
 	if (!ring)
 		return -ENOMEM;
 

From 527a27591312e4b3a0f8179f321f9e85c0850df0 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 9 Jan 2017 16:50:52 +0200
Subject: [PATCH 158/297] dmaengine: omap-dma: Fix the port_window support

We do not yet have users of port_window. The following errors were found
when converting the tusb6010_omap.c musb driver:

- The peripheral side must have SRC_/DST_PACKED disabled
- when configuring the burst for the peripheral side the memory side
  configuration were overwritten: d->csdp = ... -> d->csdp |= ...
- The EI and FI were configured for the wrong sides of the transfers.

With these changes and the converted tus6010_omap.c I was able to verify
that things are working as they expected to work.

Fixes: 201ac4861c19 ("dmaengine: omap-dma: Support for slave devices with data port window")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/omap-dma.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 4ad101a47e0a..daf479cce691 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -938,6 +938,23 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 		d->ccr |= CCR_DST_AMODE_POSTINC;
 		if (port_window) {
 			d->ccr |= CCR_SRC_AMODE_DBLIDX;
+
+			if (port_window_bytes >= 64)
+				d->csdp |= CSDP_SRC_BURST_64;
+			else if (port_window_bytes >= 32)
+				d->csdp |= CSDP_SRC_BURST_32;
+			else if (port_window_bytes >= 16)
+				d->csdp |= CSDP_SRC_BURST_16;
+
+		} else {
+			d->ccr |= CCR_SRC_AMODE_CONSTANT;
+		}
+	} else {
+		d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+
+		d->ccr |= CCR_SRC_AMODE_POSTINC;
+		if (port_window) {
+			d->ccr |= CCR_DST_AMODE_DBLIDX;
 			d->ei = 1;
 			/*
 			 * One frame covers the port_window and by  configure
@@ -948,27 +965,11 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 			d->fi = -(port_window_bytes - 1);
 
 			if (port_window_bytes >= 64)
-				d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+				d->csdp |= CSDP_DST_BURST_64;
 			else if (port_window_bytes >= 32)
-				d->csdp = CSDP_SRC_BURST_32 | CSDP_SRC_PACKED;
+				d->csdp |= CSDP_DST_BURST_32;
 			else if (port_window_bytes >= 16)
-				d->csdp = CSDP_SRC_BURST_16 | CSDP_SRC_PACKED;
-		} else {
-			d->ccr |= CCR_SRC_AMODE_CONSTANT;
-		}
-	} else {
-		d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
-
-		d->ccr |= CCR_SRC_AMODE_POSTINC;
-		if (port_window) {
-			d->ccr |= CCR_DST_AMODE_DBLIDX;
-
-			if (port_window_bytes >= 64)
-				d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
-			else if (port_window_bytes >= 32)
-				d->csdp = CSDP_DST_BURST_32 | CSDP_DST_PACKED;
-			else if (port_window_bytes >= 16)
-				d->csdp = CSDP_DST_BURST_16 | CSDP_DST_PACKED;
+				d->csdp |= CSDP_DST_BURST_16;
 		} else {
 			d->ccr |= CCR_DST_AMODE_CONSTANT;
 		}
@@ -1017,7 +1018,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 		osg->addr = sg_dma_address(sgent);
 		osg->en = en;
 		osg->fn = sg_dma_len(sgent) / frame_bytes;
-		if (port_window && dir == DMA_MEM_TO_DEV) {
+		if (port_window && dir == DMA_DEV_TO_MEM) {
 			osg->ei = 1;
 			/*
 			 * One frame covers the port_window and by  configure

From 497de07d89c1410d76a15bec2bb41f24a2a89f31 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guzheng1@huawei.com>
Date: Mon, 9 Jan 2017 09:34:48 +0800
Subject: [PATCH 159/297] tmpfs: clear S_ISGID when setting posix ACLs

This change was missed the tmpfs modification in In CVE-2016-7097
commit 073931017b49 ("posix_acl: Clear SGID bit when setting
file permissions")
It can test by xfstest generic/375, which failed to clear
setgid bit in the following test case on tmpfs:

  touch $testfile
  chown 100:100 $testfile
  chmod 2755 $testfile
  _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile

Signed-off-by: Gu Zheng <guzheng1@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/posix_acl.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 595522022aca..c9d48dc78495 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	int error;
 
 	if (type == ACL_TYPE_ACCESS) {
-		error = posix_acl_equiv_mode(acl, &inode->i_mode);
-		if (error < 0)
-			return 0;
-		if (error == 0)
-			acl = NULL;
+		error = posix_acl_update_mode(inode,
+				&inode->i_mode, &acl);
+		if (error)
+			return error;
 	}
 
 	inode->i_ctime = current_time(inode);

From 2e40795c3bf344cfb5220d94566205796e3ef19a Mon Sep 17 00:00:00 2001
From: Dennis Kadioglu <denk@post.com>
Date: Mon, 9 Jan 2017 17:10:46 +0100
Subject: [PATCH 160/297] ALSA: usb-audio: Add a quirk for Plantronics BT600

Plantronics BT600 does not support reading the sample rate which leads
to many lines of "cannot get freq at ep 0x1" and "cannot get freq at
ep 0x82". This patch adds the USB ID of the BT600 to quirks.c and
avoids those error messages.

Signed-off-by: Dennis Kadioglu <denk@post.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index b3fd2382fdd9..eb4b9f7a571e 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+	case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */
 	case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */

From 146cc8a17a3b4996f6805ee5c080e7101277c410 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 10 Jan 2017 12:05:37 +0100
Subject: [PATCH 161/297] USB: serial: kl5kusb105: fix line-state error
 handling

The current implementation failed to detect short transfers when
attempting to read the line state, and also, to make things worse,
logged the content of the uninitialised heap transfer buffer.

Fixes: abf492e7b3ae ("USB: kl5kusb105: fix DMA buffers on stack")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/kl5kusb105.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 0ee190fc1bf8..6cb45757818f 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -192,10 +192,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port,
 			     status_buf, KLSI_STATUSBUF_LEN,
 			     10000
 			     );
-	if (rc < 0)
-		dev_err(&port->dev, "Reading line status failed (error = %d)\n",
-			rc);
-	else {
+	if (rc != KLSI_STATUSBUF_LEN) {
+		dev_err(&port->dev, "reading line status failed: %d\n", rc);
+		if (rc >= 0)
+			rc = -EIO;
+	} else {
 		status = get_unaligned_le16(status_buf);
 
 		dev_info(&port->serial->dev->dev, "read status %x %x\n",

From 620f1a632ebcc9811c2f8009ba52297c7006f805 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 13 Dec 2016 18:50:13 -0800
Subject: [PATCH 162/297] wusbcore: Fix one more crypto-on-the-stack bug

The driver put a constant buffer of all zeros on the stack and
pointed a scatterlist entry at it.  This doesn't work with virtual
stacks.  Use ZERO_PAGE instead.

Cc: stable@vger.kernel.org # 4.9 only
Reported-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/crypto.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 79451f7ef1b7..062c205f0046 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -216,7 +216,6 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	struct scatterlist sg[4], sg_dst;
 	void *dst_buf;
 	size_t dst_size;
-	const u8 bzero[16] = { 0 };
 	u8 iv[crypto_skcipher_ivsize(tfm_cbc)];
 	size_t zero_padding;
 
@@ -261,7 +260,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
 	sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1));
 	sg_set_buf(&sg[2], b, blen);
 	/* 0 if well behaved :) */
-	sg_set_buf(&sg[3], bzero, zero_padding);
+	sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0);
 	sg_init_one(&sg_dst, dst_buf, dst_size);
 
 	skcipher_request_set_tfm(req, tfm_cbc);

From dc647ec88e029307e60e6bf9988056605f11051a Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 10 Jan 2017 09:30:51 +0100
Subject: [PATCH 163/297] net: socket: Make unnecessarily global
 sockfs_setattr() static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sockfs_setattr() static as it is not used outside of net/socket.c

This fixes the following GCC warning:
net/socket.c:534:5: warning: no previous prototype for ‘sockfs_setattr’ [-Wmissing-prototypes]

Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.")
Cc: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index a8c2307590b8..0758e13754e2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -533,7 +533,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
 	return used;
 }
 
-int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	int err = simple_setattr(dentry, iattr);
 

From e864212078ded276bdb272b2e0ee6a979357ca8a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:53 +0100
Subject: [PATCH 164/297] target: add XCOPY target/segment desc sense codes

As defined in http://www.t10.org/lists/asc-num.htm. To be used during
validation of XCOPY target and segment descriptor lists.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_transport.c | 24 ++++++++++++++++++++++++
 include/target/target_core_base.h      |  4 ++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 7dfefd66df93..1cadc9eefa21 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1693,6 +1693,10 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
 	case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
 	case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE:
+	case TCM_TOO_MANY_TARGET_DESCS:
+	case TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE:
+	case TCM_TOO_MANY_SEGMENT_DESCS:
+	case TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE:
 		break;
 	case TCM_OUT_OF_RESOURCES:
 		sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -2808,6 +2812,26 @@ static const struct sense_info sense_info_table[] = {
 		.key = ILLEGAL_REQUEST,
 		.asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */
 	},
+	[TCM_TOO_MANY_TARGET_DESCS] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x06, /* TOO MANY TARGET DESCRIPTORS */
+	},
+	[TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x07, /* UNSUPPORTED TARGET DESCRIPTOR TYPE CODE */
+	},
+	[TCM_TOO_MANY_SEGMENT_DESCS] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x08, /* TOO MANY SEGMENT DESCRIPTORS */
+	},
+	[TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE] = {
+		.key = ILLEGAL_REQUEST,
+		.asc = 0x26,
+		.ascq = 0x09, /* UNSUPPORTED SEGMENT DESCRIPTOR TYPE CODE */
+	},
 	[TCM_PARAMETER_LIST_LENGTH_ERROR] = {
 		.key = ILLEGAL_REQUEST,
 		.asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 29e6858bb164..43edf82e54ff 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -174,6 +174,10 @@ enum tcm_sense_reason_table {
 	TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED	= R(0x16),
 	TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED	= R(0x17),
 	TCM_COPY_TARGET_DEVICE_NOT_REACHABLE	= R(0x18),
+	TCM_TOO_MANY_TARGET_DESCS		= R(0x19),
+	TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE	= R(0x1a),
+	TCM_TOO_MANY_SEGMENT_DESCS		= R(0x1b),
+	TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE	= R(0x1c),
 #undef R
 };
 

From 61c359194c46cbffec9a1f2c59c1c4011222ad84 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:54 +0100
Subject: [PATCH 165/297] target: use XCOPY TOO MANY TARGET DESCRIPTORS sense

spc4r37 6.4.3.4 states:
  If the number of CSCD descriptors exceeds the allowed number, the copy
  manager shall terminate the command with CHECK CONDITION status, with
  the sense key set to ILLEGAL REQUEST, and the additional sense code
  set to TOO MANY TARGET DESCRIPTORS.

LIO currently responds with INVALID FIELD IN PARAMETER LIST, which sees
it fail the libiscsi ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 37d5caebffa6..db265ad10fa4 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -201,9 +201,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 			" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
 		return -EINVAL;
 	}
-	if (tdll > 64) {
+	if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) {
 		pr_err("XCOPY target descriptor supports a maximum"
 			" two src/dest descriptors, tdll: %hu too large..\n", tdll);
+		/* spc4r37 6.4.3.4 CSCD DESCRIPTOR LIST LENGTH field */
+		*sense_ret = TCM_TOO_MANY_TARGET_DESCS;
 		return -EINVAL;
 	}
 	/*

From af9f62c1686268c0517b289274d38f3a03bebd2a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:55 +0100
Subject: [PATCH 166/297] target: bounds check XCOPY segment descriptor list

Check the length of the XCOPY request segment descriptor list against
the value advertised via the MAXIMUM SEGMENT DESCRIPTOR COUNT field in
the RECEIVE COPY OPERATING PARAMETERS response.

spc4r37 6.4.3.5 states:
  If the number of segment descriptors exceeds the allowed number, the
  copy manager shall terminate the command with CHECK CONDITION status,
  with the sense key set to ILLEGAL REQUEST, and the additional sense
  code set to TOO MANY SEGMENT DESCRIPTORS.

This functionality is testable using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index db265ad10fa4..da0f2da732e7 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -308,17 +308,26 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 
 static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 				struct xcopy_op *xop, unsigned char *p,
-				unsigned int sdll)
+				unsigned int sdll, sense_reason_t *sense_ret)
 {
 	unsigned char *desc = p;
 	unsigned int start = 0;
 	int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0;
 
+	*sense_ret = TCM_INVALID_PARAMETER_LIST;
+
 	if (offset != 0) {
 		pr_err("XCOPY segment descriptor list length is not"
 			" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
 		return -EINVAL;
 	}
+	if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
+		pr_err("XCOPY supports %u segment descriptor(s), sdll: %u too"
+			" large..\n", RCR_OP_MAX_SG_DESC_COUNT, sdll);
+		/* spc4r37 6.4.3.5 SEGMENT DESCRIPTOR LIST LENGTH field */
+		*sense_ret = TCM_TOO_MANY_SEGMENT_DESCS;
+		return -EINVAL;
+	}
 
 	while (start < sdll) {
 		/*
@@ -916,7 +925,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	seg_desc = &p[16];
 	seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
 
-	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll);
+	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
+						    sdll, &ret);
 	if (rc <= 0) {
 		xcopy_pt_undepend_remotedev(xop);
 		goto out;

From 7d38706669ce00603b187f667a4eb67c94eac098 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:56 +0100
Subject: [PATCH 167/297] target: bounds check XCOPY total descriptor list
 length

spc4r37 6.4.3.5 states:
  If the combined length of the CSCD descriptors and segment descriptors
  exceeds the allowed value, then the copy manager shall terminate the
  command with CHECK CONDITION status, with the sense key set to ILLEGAL
  REQUEST, and the additional sense code set to PARAMETER LIST LENGTH
  ERROR.

This functionality can be tested using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index da0f2da732e7..0d10fcf438d1 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -894,6 +894,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	 */
 	tdll = get_unaligned_be16(&p[2]);
 	sdll = get_unaligned_be32(&p[8]);
+	if (tdll + sdll > RCR_OP_MAX_DESC_LIST_LEN) {
+		pr_err("XCOPY descriptor list length %u exceeds maximum %u\n",
+		       tdll + sdll, RCR_OP_MAX_DESC_LIST_LEN);
+		ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+		goto out;
+	}
 
 	inline_dl = get_unaligned_be32(&p[12]);
 	if (inline_dl != 0) {

From c243849720ac237e9e7191fe57f619bb3a871d4c Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:04 +0100
Subject: [PATCH 168/297] target: return UNSUPPORTED TARGET/SEGMENT DESC TYPE
 CODE sense

Use UNSUPPORTED TARGET DESCRIPTOR TYPE CODE and UNSUPPORTED SEGMENT
DESCRIPTOR TYPE CODE additional sense codes if a descriptor type in an
XCOPY request is not supported, as specified in spc4r37 6.4.5 and 6.4.6.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 0d10fcf438d1..52738a108e5f 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -199,6 +199,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	if (offset != 0) {
 		pr_err("XCOPY target descriptor list length is not"
 			" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
+		*sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
 		return -EINVAL;
 	}
 	if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) {
@@ -240,6 +241,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 		default:
 			pr_err("XCOPY unsupported descriptor type code:"
 					" 0x%02x\n", desc[0]);
+			*sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
 			goto out;
 		}
 	}
@@ -319,6 +321,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 	if (offset != 0) {
 		pr_err("XCOPY segment descriptor list length is not"
 			" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
+		*sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
 		return -EINVAL;
 	}
 	if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
@@ -346,6 +349,7 @@ static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
 		default:
 			pr_err("XCOPY unsupported segment descriptor"
 				"type: 0x%02x\n", desc[0]);
+			*sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
 			goto out;
 		}
 	}

From 94aae4caacda89a1bdb7198b260f4ca3595b7ed7 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:05 +0100
Subject: [PATCH 169/297] target: simplify XCOPY wwn->se_dev lookup helper

target_xcopy_locate_se_dev_e4() is used to locate an se_dev, based on
the WWN provided with the XCOPY request. Remove a couple of unneeded
arguments, and rely on the caller for the src/dst test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 52738a108e5f..155db18ee4e7 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -53,18 +53,13 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 	return 0;
 }
 
-static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
-					bool src)
+static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
+					struct se_device **found_dev)
 {
 	struct se_device *se_dev;
-	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
+	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
-	if (src)
-		dev_wwn = &xop->dst_tid_wwn[0];
-	else
-		dev_wwn = &xop->src_tid_wwn[0];
-
 	mutex_lock(&g_device_mutex);
 	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
 
@@ -78,15 +73,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
 		if (rc != 0)
 			continue;
 
-		if (src) {
-			xop->dst_dev = se_dev;
-			pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located"
-				" se_dev\n", xop->dst_dev);
-		} else {
-			xop->src_dev = se_dev;
-			pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located"
-				" se_dev\n", xop->src_dev);
-		}
+		*found_dev = se_dev;
+		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
 		rc = target_depend_item(&se_dev->dev_group.cg_item);
 		if (rc != 0) {
@@ -247,9 +235,11 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	}
 
 	if (xop->op_origin == XCOL_SOURCE_RECV_OP)
-		rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
+		rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn,
+						&xop->dst_dev);
 	else
-		rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
+		rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn,
+						&xop->src_dev);
 	/*
 	 * If a matching IEEE NAA 0x83 descriptor for the requested device
 	 * is not located on this node, return COPY_ABORTED with ASQ/ASQC

From f184210bca6c9d0091ff5e5629dea4cbb8a17c0f Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:06 +0100
Subject: [PATCH 170/297] target: check XCOPY segment descriptor CSCD IDs

Ensure that the segment descriptor CSCD descriptor ID values correspond
to CSCD descriptor entries located in the XCOPY command parameter list.
SPC4r37 6.4.6.1 Table 150 specifies this range as 0000h to 07FFh, where
the CSCD descriptor location in the parameter list can be located via:
16 + (id * 32)

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ bvanassche: inserted "; " in the format string of an error message
  and also moved a "||" operator from the start of a line to the end
  of the previous line ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 8 ++++++++
 drivers/target/target_core_xcopy.h | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 155db18ee4e7..41a2a8ad1046 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -278,6 +278,14 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 
 	xop->stdi = get_unaligned_be16(&desc[4]);
 	xop->dtdi = get_unaligned_be16(&desc[6]);
+
+	if (xop->stdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX ||
+	    xop->dtdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX) {
+		pr_err("XCOPY segment desc 0x02: unsupported CSCD ID > 0x%x; stdi: %hu dtdi: %hu\n",
+			XCOPY_CSCD_DESC_ID_LIST_OFF_MAX, xop->stdi, xop->dtdi);
+		return -EINVAL;
+	}
+
 	pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n",
 		desc_len, xop->stdi, xop->dtdi, dc);
 
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
index 4d3d4dd060f2..e2d141140342 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -5,6 +5,12 @@
 #define XCOPY_NAA_IEEE_REGEX_LEN	16
 #define XCOPY_MAX_SECTORS		1024
 
+/*
+ * SPC4r37 6.4.6.1
+ * Table 150 — CSCD descriptor ID values
+ */
+#define XCOPY_CSCD_DESC_ID_LIST_OFF_MAX	0x07FF
+
 enum xcopy_origin_list {
 	XCOL_SOURCE_RECV_OP = 0x01,
 	XCOL_DEST_RECV_OP = 0x02,

From 66640d35c1e4ef3c96ba5edb3c5e2ff8ab812e7a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:07 +0100
Subject: [PATCH 171/297] target: use XCOPY segment descriptor CSCD IDs

The XCOPY specification in SPC4r37 states that the XCOPY source and
destination device(s) should be derived from the copy source and copy
destination (CSCD) descriptor IDs in the XCOPY segment descriptor.

The CSCD IDs are generally (for block -> block copies), indexes into
the corresponding CSCD descriptor list, e.g.
=================================
EXTENDED COPY Header
=================================
CSCD Descriptor List
- entry 0
  + LU ID <--------------<------------------\
- entry 1                                   |
  + LU ID <______________<_____________     |
=================================      |    |
Segment Descriptor List                |    |
- segment 0                            |    |
  + src CSCD ID = 0 --------->---------+----/
  + dest CSCD ID = 1 ___________>______|
  + len
  + src lba
  + dest lba
=================================

Currently LIO completely ignores the src and dest CSCD IDs in the
Segment Descriptor List, and instead assumes that the first entry in the
CSCD list corresponds to the source, and the second to the destination.

This commit removes this assumption, by ensuring that the Segment
Descriptor List is parsed prior to processing the CSCD Descriptor List.
CSCD Descriptor List processing is modified to compare the current list
index with the previously obtained src and dest CSCD IDs.

Additionally, XCOPY requests where the src and dest CSCD IDs refer to
the CSCD Descriptor List entry can now be successfully processed.

Fixes: cbf031f ("target: Add support for EXTENDED_COPY copy offload")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191381
Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 79 ++++++++++++++++++------------
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 41a2a8ad1046..2595c1eb9e91 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -98,7 +98,7 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
-				unsigned char *p, bool src)
+				unsigned char *p, unsigned short cscd_index)
 {
 	unsigned char *desc = p;
 	unsigned short ript;
@@ -143,7 +143,13 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op
 		return -EINVAL;
 	}
 
-	if (src) {
+	if (cscd_index != xop->stdi && cscd_index != xop->dtdi) {
+		pr_debug("XCOPY 0xe4: ignoring CSCD entry %d - neither src nor "
+			 "dest\n", cscd_index);
+		return 0;
+	}
+
+	if (cscd_index == xop->stdi) {
 		memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
 		 * Determine if the source designator matches the local device
@@ -155,10 +161,15 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op
 			pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source"
 					" received xop\n", xop->src_dev);
 		}
-	} else {
+	}
+
+	if (cscd_index == xop->dtdi) {
 		memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
-		 * Determine if the destination designator matches the local device
+		 * Determine if the destination designator matches the local
+		 * device. If @cscd_index corresponds to both source (stdi) and
+		 * destination (dtdi), or dtdi comes after stdi, then
+		 * XCOL_DEST_RECV_OP wins.
 		 */
 		if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0],
 				XCOPY_NAA_IEEE_REGEX_LEN)) {
@@ -178,9 +189,9 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 {
 	struct se_device *local_dev = se_cmd->se_dev;
 	unsigned char *desc = p;
-	int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0;
+	int offset = tdll % XCOPY_TARGET_DESC_LEN, rc;
+	unsigned short cscd_index = 0;
 	unsigned short start = 0;
-	bool src = true;
 
 	*sense_ret = TCM_INVALID_PARAMETER_LIST;
 
@@ -206,25 +217,19 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 
 	while (start < tdll) {
 		/*
-		 * Check target descriptor identification with 0xE4 type with
-		 * use VPD 0x83 WWPN matching ..
+		 * Check target descriptor identification with 0xE4 type, and
+		 * compare the current index with the CSCD descriptor IDs in
+		 * the segment descriptor. Use VPD 0x83 WWPN matching ..
 		 */
 		switch (desc[0]) {
 		case 0xe4:
 			rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop,
-							&desc[0], src);
+							&desc[0], cscd_index);
 			if (rc != 0)
 				goto out;
-			/*
-			 * Assume target descriptors are in source -> destination order..
-			 */
-			if (src)
-				src = false;
-			else
-				src = true;
 			start += XCOPY_TARGET_DESC_LEN;
 			desc += XCOPY_TARGET_DESC_LEN;
-			ret++;
+			cscd_index++;
 			break;
 		default:
 			pr_err("XCOPY unsupported descriptor type code:"
@@ -234,12 +239,21 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 		}
 	}
 
-	if (xop->op_origin == XCOL_SOURCE_RECV_OP)
+	switch (xop->op_origin) {
+	case XCOL_SOURCE_RECV_OP:
 		rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn,
 						&xop->dst_dev);
-	else
+		break;
+	case XCOL_DEST_RECV_OP:
 		rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn,
 						&xop->src_dev);
+		break;
+	default:
+		pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - "
+			"stdi: %hu dtdi: %hu\n", xop->stdi, xop->dtdi);
+		rc = -EINVAL;
+		break;
+	}
 	/*
 	 * If a matching IEEE NAA 0x83 descriptor for the requested device
 	 * is not located on this node, return COPY_ABORTED with ASQ/ASQC
@@ -256,7 +270,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
 	pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n",
 		 xop->dst_dev, &xop->dst_tid_wwn[0]);
 
-	return ret;
+	return cscd_index;
 
 out:
 	return -EINVAL;
@@ -913,6 +927,20 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		" tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
 		tdll, sdll, inline_dl);
 
+	/*
+	 * skip over the target descriptors until segment descriptors
+	 * have been passed - CSCD ids are needed to determine src and dest.
+	 */
+	seg_desc = &p[16] + tdll;
+
+	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
+						    sdll, &ret);
+	if (rc <= 0)
+		goto out;
+
+	pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc,
+				rc * XCOPY_SEGMENT_DESC_LEN);
+
 	rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll, &ret);
 	if (rc <= 0)
 		goto out;
@@ -930,19 +958,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 
 	pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc,
 				rc * XCOPY_TARGET_DESC_LEN);
-	seg_desc = &p[16];
-	seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
-
-	rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
-						    sdll, &ret);
-	if (rc <= 0) {
-		xcopy_pt_undepend_remotedev(xop);
-		goto out;
-	}
 	transport_kunmap_data_sg(se_cmd);
 
-	pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc,
-				rc * XCOPY_SEGMENT_DESC_LEN);
 	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
 	queue_work(xcopy_wq, &xop->xop_work);
 	return TCM_NO_SENSE;

From f94fd098f674b78c29f482da1999d8de0c93c74e Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:08 +0100
Subject: [PATCH 172/297] target: check for XCOPY parameter truncation

Check for XCOPY header, CSCD descriptor and segment descriptor list
truncation, and respond accordingly.

SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs):
  If the parameter list length causes truncation of the parameter list,
  then the copy manager shall transfer no data and shall terminate the
  EXTENDED COPY command with CHECK CONDITION status, with the sense key
  set to ILLEGAL REQUEST, and the additional sense code set to PARAMETER
  LIST LENGTH ERROR.

This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr
test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 14 ++++++++++++++
 drivers/target/target_core_xcopy.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 2595c1eb9e91..a9a6462c66d1 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -888,6 +888,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		return TCM_UNSUPPORTED_SCSI_OPCODE;
 	}
 
+	if (se_cmd->data_length < XCOPY_HDR_LEN) {
+		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
+				se_cmd->data_length, XCOPY_HDR_LEN);
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
+	}
+
 	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
 	if (!xop) {
 		pr_err("Unable to allocate xcopy_op\n");
@@ -923,6 +929,14 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		goto out;
 	}
 
+	if (se_cmd->data_length < (XCOPY_HDR_LEN + tdll + sdll + inline_dl)) {
+		pr_err("XCOPY parameter truncation: data length %u too small "
+			"for tdll: %hu sdll: %u inline_dl: %u\n",
+			se_cmd->data_length, tdll, sdll, inline_dl);
+		ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+		goto out;
+	}
+
 	pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x"
 		" tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
 		tdll, sdll, inline_dl);
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
index e2d141140342..7c0b105cbe1b 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -1,5 +1,6 @@
 #include <target/target_core_base.h>
 
+#define XCOPY_HDR_LEN			16
 #define XCOPY_TARGET_DESC_LEN		32
 #define XCOPY_SEGMENT_DESC_LEN		28
 #define XCOPY_NAA_IEEE_REGEX_LEN	16

From 87156518da94a696f2b27ab8945d531af2f1d339 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Mon, 2 Jan 2017 18:04:09 +0100
Subject: [PATCH 173/297] target: support XCOPY requests without parameters

SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs):
  A parameter list length of zero specifies that the copy manager shall
  not transfer any data or alter any internal state, and this shall not
  be considered an error.

This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr
test.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/target/target_core_xcopy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index a9a6462c66d1..d828b3b5000b 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -888,6 +888,10 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 		return TCM_UNSUPPORTED_SCSI_OPCODE;
 	}
 
+	if (se_cmd->data_length == 0) {
+		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
+		return TCM_NO_SENSE;
+	}
 	if (se_cmd->data_length < XCOPY_HDR_LEN) {
 		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
 				se_cmd->data_length, XCOPY_HDR_LEN);

From 8fb280616878b81c0790a0c33acbeec59c5711f4 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Tue, 10 Jan 2017 17:04:06 +0800
Subject: [PATCH 174/297] r8152: split rtl8152_suspend function

Split rtl8152_suspend() into rtl8152_system_suspend() and
rtl8152_rumtime_suspend().

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 57 +++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 7dc61228c55b..c5e6d88de4e4 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3576,39 +3576,62 @@ static bool delay_autosuspend(struct r8152 *tp)
 		return false;
 }
 
-static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
+static int rtl8152_rumtime_suspend(struct r8152 *tp)
 {
-	struct r8152 *tp = usb_get_intfdata(intf);
 	struct net_device *netdev = tp->netdev;
 	int ret = 0;
 
-	mutex_lock(&tp->control);
-
-	if (PMSG_IS_AUTO(message)) {
-		if (netif_running(netdev) && delay_autosuspend(tp)) {
+	if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
+		if (delay_autosuspend(tp)) {
 			ret = -EBUSY;
 			goto out1;
 		}
 
-		set_bit(SELECTIVE_SUSPEND, &tp->flags);
-	} else {
-		netif_device_detach(netdev);
+		clear_bit(WORK_ENABLE, &tp->flags);
+		usb_kill_urb(tp->intr_urb);
+		napi_disable(&tp->napi);
+		rtl_stop_rx(tp);
+		tp->rtl_ops.autosuspend_en(tp, true);
+		napi_enable(&tp->napi);
 	}
 
+	set_bit(SELECTIVE_SUSPEND, &tp->flags);
+
+out1:
+	return ret;
+}
+
+static int rtl8152_system_suspend(struct r8152 *tp)
+{
+	struct net_device *netdev = tp->netdev;
+	int ret = 0;
+
+	netif_device_detach(netdev);
+
 	if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
 		clear_bit(WORK_ENABLE, &tp->flags);
 		usb_kill_urb(tp->intr_urb);
 		napi_disable(&tp->napi);
-		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-			rtl_stop_rx(tp);
-			tp->rtl_ops.autosuspend_en(tp, true);
-		} else {
-			cancel_delayed_work_sync(&tp->schedule);
-			tp->rtl_ops.down(tp);
-		}
+		cancel_delayed_work_sync(&tp->schedule);
+		tp->rtl_ops.down(tp);
 		napi_enable(&tp->napi);
 	}
-out1:
+
+	return ret;
+}
+
+static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct r8152 *tp = usb_get_intfdata(intf);
+	int ret;
+
+	mutex_lock(&tp->control);
+
+	if (PMSG_IS_AUTO(message))
+		ret = rtl8152_rumtime_suspend(tp);
+	else
+		ret = rtl8152_system_suspend(tp);
+
 	mutex_unlock(&tp->control);
 
 	return ret;

From 75dc692eda114cb234a46cb11893a9c3ea520934 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Tue, 10 Jan 2017 17:04:07 +0800
Subject: [PATCH 175/297] r8152: fix rx issue for runtime suspend

Pause the rx and make sure the rx fifo is empty when the autosuspend
occurs.

If the rx data comes when the driver is canceling the rx urb, the host
controller would stop getting the data from the device and continue
it after next rx urb is submitted. That is, one continuing data is
split into two different urb buffers. That let the driver take the
data as a rx descriptor, and unexpected behavior happens.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index c5e6d88de4e4..be418563cb18 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3582,17 +3582,42 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp)
 	int ret = 0;
 
 	if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
+		u32 rcr = 0;
+
 		if (delay_autosuspend(tp)) {
 			ret = -EBUSY;
 			goto out1;
 		}
 
+		if (netif_carrier_ok(netdev)) {
+			u32 ocp_data;
+
+			rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
+			ocp_data = rcr & ~RCR_ACPT_ALL;
+			ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
+			rxdy_gated_en(tp, true);
+			ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA,
+						 PLA_OOB_CTRL);
+			if (!(ocp_data & RXFIFO_EMPTY)) {
+				rxdy_gated_en(tp, false);
+				ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
+				ret = -EBUSY;
+				goto out1;
+			}
+		}
+
 		clear_bit(WORK_ENABLE, &tp->flags);
 		usb_kill_urb(tp->intr_urb);
-		napi_disable(&tp->napi);
-		rtl_stop_rx(tp);
+
 		tp->rtl_ops.autosuspend_en(tp, true);
-		napi_enable(&tp->napi);
+
+		if (netif_carrier_ok(netdev)) {
+			napi_disable(&tp->napi);
+			rtl_stop_rx(tp);
+			rxdy_gated_en(tp, false);
+			ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
+			napi_enable(&tp->napi);
+		}
 	}
 
 	set_bit(SELECTIVE_SUSPEND, &tp->flags);

From 7b6c1b4c0e1e44544aa18161dba6a741c080a7ef Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Tue, 10 Jan 2017 10:46:00 -0600
Subject: [PATCH 176/297] usb: musb: fix runtime PM in debugfs

MUSB driver now has runtime PM support, but the debugfs driver misses
the PM _get/_put() calls, which could cause MUSB register access
failure.

Cc: stable@vger.kernel.org # 4.9+
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_debugfs.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index 4fef50e5c8c1..dd70c88419d2 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -114,6 +114,7 @@ static int musb_regdump_show(struct seq_file *s, void *unused)
 	unsigned		i;
 
 	seq_printf(s, "MUSB (M)HDRC Register Dump\n");
+	pm_runtime_get_sync(musb->controller);
 
 	for (i = 0; i < ARRAY_SIZE(musb_regmap); i++) {
 		switch (musb_regmap[i].size) {
@@ -132,6 +133,8 @@ static int musb_regdump_show(struct seq_file *s, void *unused)
 		}
 	}
 
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return 0;
 }
 
@@ -145,7 +148,10 @@ static int musb_test_mode_show(struct seq_file *s, void *unused)
 	struct musb		*musb = s->private;
 	unsigned		test;
 
+	pm_runtime_get_sync(musb->controller);
 	test = musb_readb(musb->mregs, MUSB_TESTMODE);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	if (test & MUSB_TEST_FORCE_HOST)
 		seq_printf(s, "force host\n");
@@ -194,11 +200,12 @@ static ssize_t musb_test_mode_write(struct file *file,
 	u8			test;
 	char			buf[18];
 
+	pm_runtime_get_sync(musb->controller);
 	test = musb_readb(musb->mregs, MUSB_TESTMODE);
 	if (test) {
 		dev_err(musb->controller, "Error: test mode is already set. "
 			"Please do USB Bus Reset to start a new test.\n");
-		return count;
+		goto ret;
 	}
 
 	memset(buf, 0x00, sizeof(buf));
@@ -234,6 +241,9 @@ static ssize_t musb_test_mode_write(struct file *file,
 
 	musb_writeb(musb->mregs, MUSB_TESTMODE, test);
 
+ret:
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return count;
 }
 
@@ -254,8 +264,13 @@ static int musb_softconnect_show(struct seq_file *s, void *unused)
 	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_HOST:
 	case OTG_STATE_A_WAIT_BCON:
+		pm_runtime_get_sync(musb->controller);
+
 		reg = musb_readb(musb->mregs, MUSB_DEVCTL);
 		connect = reg & MUSB_DEVCTL_SESSION ? 1 : 0;
+
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
 		break;
 	default:
 		connect = -1;
@@ -284,6 +299,7 @@ static ssize_t musb_softconnect_write(struct file *file,
 	if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
 		return -EFAULT;
 
+	pm_runtime_get_sync(musb->controller);
 	if (!strncmp(buf, "0", 1)) {
 		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_HOST:
@@ -314,6 +330,8 @@ static ssize_t musb_softconnect_write(struct file *file,
 		}
 	}
 
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	return count;
 }
 

From 7c9d8d0c41b3e24473ac7648a7fc2d644ccf08ff Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bgly@us.ibm.com>
Date: Mon, 9 Jan 2017 10:21:20 -0600
Subject: [PATCH 177/297] ibmvscsis: Fix srp_transfer_data fail return code

If srp_transfer_data fails within ibmvscsis_write_pending, then
the most likely scenario is that the client timed out the op and
removed the TCE mapping. Thus it will loop forever retrying the
op that is pretty much guaranteed to fail forever. A better return
code would be EIO instead of EAGAIN.

Cc: stable@vger.kernel.org
Reported-by: Steven Royer <seroyer@linux.vnet.ibm.com>
Tested-by: Steven Royer <seroyer@linux.vnet.ibm.com>
Signed-off-by: Bryant G. Ly <bgly@us.ibm.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 3d3768aaab4f..8fb5c54c7dd3 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3585,7 +3585,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
 			       1, 1);
 	if (rc) {
 		pr_err("srp_transfer_data() failed: %d\n", rc);
-		return -EAGAIN;
+		return -EIO;
 	}
 	/*
 	 * We now tell TCM to add this WRITE CDB directly into the TCM storage

From d9584d8ccc06ba98f4fad8ec720de66b6659fd35 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 9 Jan 2017 11:18:01 -0800
Subject: [PATCH 178/297] net: skb_flow_get_be16() can be static

Removes following sparse complain :

net/core/flow_dissector.c:70:8: warning: symbol 'skb_flow_get_be16'
was not declared. Should it be static?

Fixes: 972d3876faa8 ("flow dissector: ICMP support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index fe4e1531976c..1b7673aac59d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -67,8 +67,8 @@ EXPORT_SYMBOL(skb_flow_dissector_init);
  * The function will try to retrieve a be32 entity at
  * offset poff
  */
-__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
-			 int hlen)
+static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
+				void *data, int hlen)
 {
 	__be16 *u, _u;
 

From dd545b52a3e1efd9f2c6352dbe95ccd0c53461cc Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Tue, 10 Jan 2017 13:29:54 -0700
Subject: [PATCH 179/297] do_direct_IO: Use inode->i_blkbits to compute block
 count to be cleaned

The code currently uses sdio->blkbits to compute the number of blocks to
be cleaned. However sdio->blkbits is derived from the logical block size
of the underlying block device (Refer to the definition of
do_blockdev_direct_IO()). Due to this, generic/299 test would rarely
fail when executed on an ext4 filesystem with 64k as the block size and
when using a virtio based disk (having 512 byte as the logical block
size) inside a kvm guest.

This commit fixes the bug by using inode->i_blkbits to compute the
number of blocks to be cleaned.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

Fixed up by Jeff Moyer to only use/evaluate inode->i_blkbits once,
to avoid issues with block size changes with IO in flight.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/direct-io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index aeae8c063451..c87bae4376b8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 			struct buffer_head *map_bh)
 {
 	const unsigned blkbits = sdio->blkbits;
+	const unsigned i_blkbits = blkbits + sdio->blkfactor;
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
@@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 					clean_bdev_aliases(
 						map_bh->b_bdev,
 						map_bh->b_blocknr,
-						map_bh->b_size >> blkbits);
+						map_bh->b_size >> i_blkbits);
 				}
 
 				if (!sdio->blkfactor)

From a14d749fcebe97ddf6af6db3d1f6ece85c9ddcb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 08:56:23 -0700
Subject: [PATCH 180/297] virtio_blk: avoid DMA to stack for the sense buffer

Most users of BLOCK_PC requests allocate the sense buffer on the stack,
so to avoid DMA to the stack copy them to a field in the heap allocated
virtblk_req structure.  Without that any attempt at SCSI passthrough I/O,
including the SG_IO ioctl from userspace will crash the kernel.  Note that
this includes running tools like hdparm even when the host does not have
SCSI passthrough enabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/virtio_blk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5545a679abd8..3c3b8f601469 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -56,6 +56,7 @@ struct virtblk_req {
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
 	u8 status;
+	u8 sense[SCSI_SENSE_BUFFERSIZE];
 	struct scatterlist sg[];
 };
 
@@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	}
 
 	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
-		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
+		memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
+		sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
 		sgs[num_out + num_in++] = &sense;
 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
 		sgs[num_out + num_in++] = &inhdr;

From 25b4acfc7de0fc4da3bfea3a316f7282c6fbde81 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Mon, 9 Jan 2017 15:20:31 -0500
Subject: [PATCH 181/297] nbd: blk_mq_init_queue returns an error code on
 failure, not NULL

Additionally, don't assign directly to disk->queue, otherwise
blk_put_queue (called via put_disk) will choke (panic) on the errno
stored there.

Bug found by code inspection after Omar found a similar issue in
virtio_blk.  Compile-tested only.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 38c576f76d36..50a2020b5b72 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1042,6 +1042,7 @@ static int __init nbd_init(void)
 		return -ENOMEM;
 
 	for (i = 0; i < nbds_max; i++) {
+		struct request_queue *q;
 		struct gendisk *disk = alloc_disk(1 << part_shift);
 		if (!disk)
 			goto out;
@@ -1067,12 +1068,13 @@ static int __init nbd_init(void)
 		 * every gendisk to have its very own request_queue struct.
 		 * These structs are big so we dynamically allocate them.
 		 */
-		disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
-		if (!disk->queue) {
+		q = blk_mq_init_queue(&nbd_dev[i].tag_set);
+		if (IS_ERR(q)) {
 			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
 			put_disk(disk);
 			goto out;
 		}
+		disk->queue = q;
 
 		/*
 		 * Tell the block layer that we are not a rotational device

From 6bf6b0aa3da84a3d9126919a94c49c0fb7ee2fb3 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 9 Jan 2017 11:44:12 -0800
Subject: [PATCH 182/297] virtio_blk: fix panic in initialization error path

If blk_mq_init_queue() returns an error, it gets assigned to
vblk->disk->queue. Then, when we call put_disk(), we end up calling
blk_put_queue() with the ERR_PTR, causing a bad dereference. Fix it by
only assigning to vblk->disk->queue on success.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/virtio_blk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3c3b8f601469..10332c24f961 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -630,11 +630,12 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_put_disk;
 
-	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
+	q = blk_mq_init_queue(&vblk->tag_set);
 	if (IS_ERR(q)) {
 		err = -ENOMEM;
 		goto out_free_tags;
 	}
+	vblk->disk->queue = q;
 
 	q->queuedata = vblk;
 

From faf3a932fbeb77860226a8323eacb835edc98648 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 9 Jan 2017 11:58:34 -0800
Subject: [PATCH 183/297] net: dsa: Ensure validity of dst->ds[0]

It is perfectly possible to have non zero indexed switches being present
in a DSA switch tree, in such a case, we will be deferencing a NULL
pointer while dsa_cpu_port_ethtool_{setup,restore}. Be more defensive
and ensure that dst->ds[0] is valid before doing anything with it.

Fixes: 0c73c523cf73 ("net: dsa: Initialize CPU port ethtool ops per tree")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 5fff951a0a49..da3862124545 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
 			return err;
 	}
 
-	err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
-	if (err)
-		return err;
+	if (dst->ds[0]) {
+		err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+		if (err)
+			return err;
+	}
 
 	/* If we use a tagging format that doesn't have an ethertype
 	 * field, make sure that all packets from this point on get
@@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
 		dsa_ds_unapply(dst, ds);
 	}
 
-	dsa_cpu_port_ethtool_restore(dst->ds[0]);
+	if (dst->ds[0])
+		dsa_cpu_port_ethtool_restore(dst->ds[0]);
 
 	pr_info("DSA: tree %d unapplied\n", dst->tree);
 	dst->applied = false;

From 3512a1ad56174308a9fd3e10f4b1e3e152e9ec01 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 9 Jan 2017 14:31:58 -0800
Subject: [PATCH 184/297] net: qrtr: Mark 'buf' as little endian

Failure to mark this pointer as __le32 causes checkers like
sparse to complain:

net/qrtr/qrtr.c:274:16: warning: incorrect type in assignment (different base types)
net/qrtr/qrtr.c:274:16:    expected unsigned int [unsigned] [usertype] <noident>
net/qrtr/qrtr.c:274:16:    got restricted __le32 [usertype] <noident>
net/qrtr/qrtr.c:275:16: warning: incorrect type in assignment (different base types)
net/qrtr/qrtr.c:275:16:    expected unsigned int [unsigned] [usertype] <noident>
net/qrtr/qrtr.c:275:16:    got restricted __le32 [usertype] <noident>
net/qrtr/qrtr.c:276:16: warning: incorrect type in assignment (different base types)
net/qrtr/qrtr.c:276:16:    expected unsigned int [unsigned] [usertype] <noident>
net/qrtr/qrtr.c:276:16:    got restricted __le32 [usertype] <noident>

Silence it.

Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c985ecbe9bd6..ae5ac175b2be 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -252,7 +252,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
 	const int pkt_len = 20;
 	struct qrtr_hdr *hdr;
 	struct sk_buff *skb;
-	u32 *buf;
+	__le32 *buf;
 
 	skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
 	if (!skb)
@@ -269,7 +269,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
 	hdr->dst_node_id = cpu_to_le32(dst_node);
 	hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
 
-	buf = (u32 *)skb_put(skb, pkt_len);
+	buf = (__le32 *)skb_put(skb, pkt_len);
 	memset(buf, 0, pkt_len);
 	buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
 	buf[1] = cpu_to_le32(src_node);

From 5d722b3024f6762addb8642ffddc9f275b5107ae Mon Sep 17 00:00:00 2001
From: "Anna, Suman" <s-anna@ti.com>
Date: Mon, 9 Jan 2017 21:48:56 -0600
Subject: [PATCH 185/297] net: add the AF_QIPCRTR entries to family name tables

Commit bdabad3e363d ("net: Add Qualcomm IPC router") introduced a
new address family. Update the family name tables accordingly so
that the lockdep initialization can use the proper names for this
family.

Cc: Courtney Cavin <courtney.cavin@sonymobile.com>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index f560e0826009..4eca27dc5c94 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
   "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
   "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
-  "sk_lock-AF_MAX"
+  "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX"
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
   "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
   "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
-  "slock-AF_MAX"
+  "slock-AF_QIPCRTR", "slock-AF_MAX"
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
@@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
   "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
   "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
-  "clock-AF_MAX"
+  "clock-AF_QIPCRTR", "clock-AF_MAX"
 };
 
 /*

From dc5367bcc556e97555fc94a32cd1aadbebdff47e Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Tue, 10 Jan 2017 17:10:34 +0100
Subject: [PATCH 186/297] net/af_iucv: don't use paged skbs for TX on
 HiperSockets

With commit e53743994e21
("af_iucv: use paged SKBs for big outbound messages"),
we transmit paged skbs for both of AF_IUCV's transport modes
(IUCV or HiperSockets).
The qeth driver for Layer 3 HiperSockets currently doesn't
support NETIF_F_SG, so these skbs would just be linearized again
by the stack.
Avoid that overhead by using paged skbs only for IUCV transport.

cc stable, since this also circumvents a significant skb leak when
sending large messages (where the skb then needs to be linearized).

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v4.8+
Fixes: e53743994e21 ("af_iucv: use paged SKBs for big outbound messages")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index cfb9e5f4e28f..13190b38f22e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1044,7 +1044,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
-	size_t headroom, linear;
+	size_t headroom = 0;
+	size_t linear;
 	struct sk_buff *skb;
 	struct iucv_message txmsg = {0};
 	struct cmsghdr *cmsg;
@@ -1122,18 +1123,20 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 	 * this is fine for SOCK_SEQPACKET (unless we want to support
 	 * segmented records using the MSG_EOR flag), but
 	 * for SOCK_STREAM we might want to improve it in future */
-	headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
-		   ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
-	if (headroom + len < PAGE_SIZE) {
+	if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+		headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
 		linear = len;
 	} else {
-		/* In nonlinear "classic" iucv skb,
-		 * reserve space for iucv_array
-		 */
-		if (iucv->transport != AF_IUCV_TRANS_HIPER)
-			headroom += sizeof(struct iucv_array) *
-				    (MAX_SKB_FRAGS + 1);
-		linear = PAGE_SIZE - headroom;
+		if (len < PAGE_SIZE) {
+			linear = len;
+		} else {
+			/* In nonlinear "classic" iucv skb,
+			 * reserve space for iucv_array
+			 */
+			headroom = sizeof(struct iucv_array) *
+				   (MAX_SKB_FRAGS + 1);
+			linear = PAGE_SIZE - headroom;
+		}
 	}
 	skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
 				   noblock, &err, 0);

From 9f9b74ef896792399dc7b5121896b9c963db80fb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 10 Jan 2017 09:41:49 -0800
Subject: [PATCH 187/297] mlx4: Return EOPNOTSUPP instead of ENOTSUPP

In commit b45f0674b997 ("mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs"),
it changed EOPNOTSUPP to ENOTSUPP by mistake.  This patch fixes it.

Fixes: b45f0674b997 ("mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index edbe200ac2fa..4910d9af1933 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2277,7 +2277,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 
 	if (priv->tx_ring_num[TX_XDP] &&
 	    !mlx4_en_check_xdp_mtu(dev, new_mtu))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	dev->mtu = new_mtu;
 

From 1272ce87fa017ca4cf32920764d879656b7a005a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Jan 2017 12:24:01 -0800
Subject: [PATCH 188/297] gro: Enter slow-path if there is no tailroom

The GRO path has a fast-path where we avoid calling pskb_may_pull
and pskb_expand by directly accessing frag0.  However, this should
only be done if we have enough tailroom in the skb as otherwise
we'll have to expand it later anyway.

This patch adds the check by capping frag0_len with the skb tailroom.

Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull")
Reported-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8db5a0b4b520..88d2907ca2cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4441,7 +4441,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
 	    pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
-		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
+		NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0),
+						  skb->end - skb->tail);
 	}
 }
 

From 57ea52a865144aedbcd619ee0081155e658b6f7d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Jan 2017 12:24:15 -0800
Subject: [PATCH 189/297] gro: Disable frag0 optimization on IPv6 ext headers

The GRO fast path caches the frag0 address.  This address becomes
invalid if frag0 is modified by pskb_may_pull or its variants.
So whenever that happens we must disable the frag0 optimization.

This is usually done through the combination of gro_header_hard
and gro_header_slow, however, the IPv6 extension header path did
the pulling directly and would continue to use the GRO fast path
incorrectly.

This patch fixes it by disabling the fast path when we enter the
IPv6 extension header path.

Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
Reported-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++++--
 net/ipv6/ip6_offload.c    | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f7423a74b..9bde9558b596 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2477,14 +2477,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
 	return NAPI_GRO_CB(skb)->frag0_len < hlen;
 }
 
+static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
+}
+
 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
 					unsigned int offset)
 {
 	if (!pskb_may_pull(skb, hlen))
 		return NULL;
 
-	NAPI_GRO_CB(skb)->frag0 = NULL;
-	NAPI_GRO_CB(skb)->frag0_len = 0;
+	skb_gro_frag0_invalidate(skb);
 	return skb->data + offset;
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 89c59e656f44..fc7b4017ba24 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	ops = rcu_dereference(inet6_offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive) {
 		__pskb_pull(skb, skb_gro_offset(skb));
+		skb_gro_frag0_invalidate(skb);
 		proto = ipv6_gso_pull_exthdrs(skb, proto);
 		skb_gro_pull(skb, -skb_transport_offset(skb));
 		skb_reset_transport_header(skb);

From 5771f6ea8d5ccc0df4d02ae65833413150a1b829 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 10 Jan 2017 16:57:12 -0800
Subject: [PATCH 190/297] MAINTAINERS: remove duplicate bug filling description

I have noticed that two different descriptions for B: entries in
MAINTAINERS were merged: commit 686564434e88 ("MAINTAINERS: Add bug
tracking system location entry type") and 2de2bd95f456 ("MAINTAINERS:
add "B:" for URI where to file bugs").

This patch keeps the description from 2de2bd95f456.  There has been a
discussion [1] about whether this more detailed description is useful
and what it exactly implies.  I find it more useful and general, and the
author of 686564434e88 agreed in the end that either is fine.

[1] https://lkml.org/lkml/2016/12/8/71

Link: http://lkml.kernel.org/r/20161219085158.12114-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 35c9cbfe4f2d..0277df881da4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -81,7 +81,6 @@ Descriptions of section entries:
 	Q: Patchwork web based patch tracking system site
 	T: SCM tree type and location.
 	   Type is one of: git, hg, quilt, stgit, topgit
-	B: Bug tracking system location.
 	S: Status, one of the following:
 	   Supported:	Someone is actually paid to look after this.
 	   Maintained:	Someone actually looks after it.

From 965d004af54088d138f806d04d803fb60d441986 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 10 Jan 2017 16:57:15 -0800
Subject: [PATCH 191/297] dax: fix deadlock with DAX 4k holes

Currently in DAX if we have three read faults on the same hole address we
can end up with the following:

Thread 0		Thread 1		Thread 2
--------		--------		--------
dax_iomap_fault
 grab_mapping_entry
  lock_slot
   <locks empty DAX entry>

  			dax_iomap_fault
			 grab_mapping_entry
			  get_unlocked_mapping_entry
			   <sleeps on empty DAX entry>

						dax_iomap_fault
						 grab_mapping_entry
						  get_unlocked_mapping_entry
						   <sleeps on empty DAX entry>
  dax_load_hole
   find_or_create_page
   ...
    page_cache_tree_insert
     dax_wake_mapping_entry_waiter
      <wakes one sleeper>
     __radix_tree_replace
      <swaps empty DAX entry with 4k zero page>

			<wakes>
			get_page
			lock_page
			...
			put_locked_mapping_entry
			unlock_page
			put_page

						<sleeps forever on the DAX
						 wait queue>

The crux of the problem is that once we insert a 4k zero page, all
locking from then on is done in terms of that 4k zero page and any
additional threads sleeping on the empty DAX entry will never be woken.

Fix this by waking all sleepers when we replace the DAX radix tree entry
with a 4k zero page.  This will allow all sleeping threads to
successfully transition from locking based on the DAX empty entry to
locking on the 4k zero page.

With the test case reported by Xiong this happens very regularly in my
test setup, with some runs resulting in 9+ threads in this deadlocked
state.  With this fix I've been able to run that same test dozens of
times in a loop without issue.

Fixes: ac401cc78242 ("dax: New fault locking")
Link: http://lkml.kernel.org/r/1483479365-13607-1-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: Xiong Zhou <xzhou@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: <stable@vger.kernel.org>	[4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index d0e4d1002059..b772a33ef640 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -138,7 +138,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 				dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
 			/* Wakeup waiters for exceptional entry lock */
 			dax_wake_mapping_entry_waiter(mapping, page->index, p,
-						      false);
+						      true);
 		}
 	}
 	__radix_tree_replace(&mapping->page_tree, node, slot, page,

From d670ffd87509b6b136d8ed6f757851a8ebe442b2 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 10 Jan 2017 16:57:18 -0800
Subject: [PATCH 192/297] mm/thp/pagecache/collapse: free the pte page table on
 collapse for thp page cache.

With THP page cache, when trying to build a huge page from regular pte
pages, we just clear the pmd entry.  We will take another fault and at
that point we will find the huge page in the radix tree, thereby using
the huge page to complete the page fault

The second fault path will allocate the needed pgtable_t page for archs
like ppc64.  So no need to deposit the same in collapse path.
Depositing them in the collapse path resulting in a pgtable_t memory
leak also giving errors like

  BUG: non-zero nr_ptes on freeing mm: 3

Fixes: 953c66c2b22a ("mm: THP page cache support for ppc64")
Link: http://lkml.kernel.org/r/20161212163428.6780-2-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e32389a97030..b0924a68cc36 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1242,7 +1242,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 	struct vm_area_struct *vma;
 	unsigned long addr;
 	pmd_t *pmd, _pmd;
-	bool deposited = false;
 
 	i_mmap_lock_write(mapping);
 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
@@ -1267,26 +1266,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 			spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
 			/* assume page table is clear */
 			_pmd = pmdp_collapse_flush(vma, addr, pmd);
-			/*
-			 * now deposit the pgtable for arch that need it
-			 * otherwise free it.
-			 */
-			if (arch_needs_pgtable_deposit()) {
-				/*
-				 * The deposit should be visibile only after
-				 * collapse is seen by others.
-				 */
-				smp_wmb();
-				pgtable_trans_huge_deposit(vma->vm_mm, pmd,
-							   pmd_pgtable(_pmd));
-				deposited = true;
-			}
 			spin_unlock(ptl);
 			up_write(&vma->vm_mm->mmap_sem);
-			if (!deposited) {
-				atomic_long_dec(&vma->vm_mm->nr_ptes);
-				pte_free(vma->vm_mm, pmd_pgtable(_pmd));
-			}
+			atomic_long_dec(&vma->vm_mm->nr_ptes);
+			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
 		}
 	}
 	i_mmap_unlock_write(mapping);

From 097963959594c5eccaba42510f7033f703211bda Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 10 Jan 2017 16:57:21 -0800
Subject: [PATCH 193/297] mm: add follow_pte_pmd()

Patch series "Write protect DAX PMDs in *sync path".

Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation.  This can result
in data loss, as detailed in patch 2.

This series is based on Dan's "libnvdimm-pending" branch, which is the
current home for Jan's "dax: Page invalidation fixes" series.  You can
find a working tree here:

  https://git.kernel.org/cgit/linux/kernel/git/zwisler/linux.git/log/?h=dax_pmd_clean

This patch (of 2):

Similar to follow_pte(), follow_pte_pmd() allows either a PTE leaf or a
huge page PMD leaf to be found and returned.

Link: http://lkml.kernel.org/r/1482272586-21177-2-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe6b4036664a..02793ac64ac6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1212,6 +1212,8 @@ void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
 	       spinlock_t **ptlp);
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 	unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 9f2c15cdb32c..b62f3bc63481 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3772,8 +3772,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static int __follow_pte(struct mm_struct *mm, unsigned long address,
-		pte_t **ptepp, spinlock_t **ptlp)
+static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+		pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -3790,11 +3790,20 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
 
 	pmd = pmd_offset(pud, address);
 	VM_BUG_ON(pmd_trans_huge(*pmd));
-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		goto out;
 
-	/* We cannot handle huge page PFN maps. Luckily they don't exist. */
-	if (pmd_huge(*pmd))
+	if (pmd_huge(*pmd)) {
+		if (!pmdpp)
+			goto out;
+
+		*ptlp = pmd_lock(mm, pmd);
+		if (pmd_huge(*pmd)) {
+			*pmdpp = pmd;
+			return 0;
+		}
+		spin_unlock(*ptlp);
+	}
+
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
 		goto out;
 
 	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
@@ -3817,10 +3826,24 @@ int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
 
 	/* (void) is needed to make gcc happy */
 	(void) __cond_lock(*ptlp,
-			   !(res = __follow_pte(mm, address, ptepp, ptlp)));
+			   !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
+					   ptlp)));
 	return res;
 }
 
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+{
+	int res;
+
+	/* (void) is needed to make gcc happy */
+	(void) __cond_lock(*ptlp,
+			   !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
+					   ptlp)));
+	return res;
+}
+EXPORT_SYMBOL(follow_pte_pmd);
+
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping

From f729c8c9b24f0540a6e6b86e68f3888ba90ef7e7 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 10 Jan 2017 16:57:24 -0800
Subject: [PATCH 194/297] dax: wrprotect pmd_t in dax_mapping_entry_mkclean

Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation.  This can result
in data loss in the following sequence:

1) mmap write to DAX PMD, dirtying PMD radix tree entry and making the
   pmd_t dirty and writeable
2) fsync, flushing out PMD data and cleaning the radix tree entry. We
   currently fail to mark the pmd_t as clean and write protected.
3) more mmap writes to the PMD.  These don't cause any page faults since
   the pmd_t is dirty and writeable.  The radix tree entry remains clean.
4) fsync, which fails to flush the dirty PMD data because the radix tree
   entry was clean.
5) crash - dirty data that should have been fsync'd as part of 4) could
   still have been in the processor cache, and is lost.

Fix this by marking the pmd_t clean and write protected in
dax_mapping_entry_mkclean(), which is called as part of the fsync
operation 2).  This will cause the writes in step 3) above to generate
page faults where we'll re-dirty the PMD radix tree entry, resulting in
flushes in the fsync that happens in step 4).

Fixes: 4b4bb46d00b3 ("dax: clear dirty entry tags on cache flush")
Link: http://lkml.kernel.org/r/1482272586-21177-3-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c           | 51 ++++++++++++++++++++++++++++++++--------------
 include/linux/mm.h |  2 --
 mm/memory.c        |  4 ++--
 3 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5c74f60d0a50..ddcddfeaa03b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
 				      pgoff_t index, unsigned long pfn)
 {
 	struct vm_area_struct *vma;
-	pte_t *ptep;
-	pte_t pte;
+	pte_t pte, *ptep = NULL;
+	pmd_t *pmdp = NULL;
 	spinlock_t *ptl;
 	bool changed;
 
@@ -707,21 +707,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
 
 		address = pgoff_address(index, vma);
 		changed = false;
-		if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+		if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
 			continue;
-		if (pfn != pte_pfn(*ptep))
-			goto unlock;
-		if (!pte_dirty(*ptep) && !pte_write(*ptep))
-			goto unlock;
 
-		flush_cache_page(vma, address, pfn);
-		pte = ptep_clear_flush(vma, address, ptep);
-		pte = pte_wrprotect(pte);
-		pte = pte_mkclean(pte);
-		set_pte_at(vma->vm_mm, address, ptep, pte);
-		changed = true;
-unlock:
-		pte_unmap_unlock(ptep, ptl);
+		if (pmdp) {
+#ifdef CONFIG_FS_DAX_PMD
+			pmd_t pmd;
+
+			if (pfn != pmd_pfn(*pmdp))
+				goto unlock_pmd;
+			if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+				goto unlock_pmd;
+
+			flush_cache_page(vma, address, pfn);
+			pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+			pmd = pmd_wrprotect(pmd);
+			pmd = pmd_mkclean(pmd);
+			set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+			changed = true;
+unlock_pmd:
+			spin_unlock(ptl);
+#endif
+		} else {
+			if (pfn != pte_pfn(*ptep))
+				goto unlock_pte;
+			if (!pte_dirty(*ptep) && !pte_write(*ptep))
+				goto unlock_pte;
+
+			flush_cache_page(vma, address, pfn);
+			pte = ptep_clear_flush(vma, address, ptep);
+			pte = pte_wrprotect(pte);
+			pte = pte_mkclean(pte);
+			set_pte_at(vma->vm_mm, address, ptep, pte);
+			changed = true;
+unlock_pte:
+			pte_unmap_unlock(ptep, ptl);
+		}
 
 		if (changed)
 			mmu_notifier_invalidate_page(vma->vm_mm, address);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02793ac64ac6..b84615b0f64c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1210,8 +1210,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
-int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
-	       spinlock_t **ptlp);
 int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index b62f3bc63481..6bf2b471e30c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3819,8 +3819,8 @@ out:
 	return -EINVAL;
 }
 
-int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
-	       spinlock_t **ptlp)
+static inline int follow_pte(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, spinlock_t **ptlp)
 {
 	int res;
 

From bb1107f7c6052c863692a41f78c000db792334bf Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:27 -0800
Subject: [PATCH 195/297] mm, slab: make sure that KMALLOC_MAX_SIZE will fit
 into MAX_ORDER

Andrey Konovalov has reported the following warning triggered by the
syzkaller fuzzer.

  WARNING: CPU: 1 PID: 9935 at mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 1 PID: 9935 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #34
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
    __alloc_pages_slowpath mm/page_alloc.c:3511
    __alloc_pages_nodemask+0x159c/0x1e20 mm/page_alloc.c:3781
    alloc_pages_current+0x1c7/0x6b0 mm/mempolicy.c:2072
    alloc_pages include/linux/gfp.h:469
    kmalloc_order+0x1f/0x70 mm/slab_common.c:1015
    kmalloc_order_trace+0x1f/0x160 mm/slab_common.c:1026
    kmalloc_large include/linux/slab.h:422
    __kmalloc+0x210/0x2d0 mm/slub.c:3723
    kmalloc include/linux/slab.h:495
    ep_write_iter+0x167/0xb50 drivers/usb/gadget/legacy/inode.c:664
    new_sync_write fs/read_write.c:499
    __vfs_write+0x483/0x760 fs/read_write.c:512
    vfs_write+0x170/0x4e0 fs/read_write.c:560
    SYSC_write fs/read_write.c:607
    SyS_write+0xfb/0x230 fs/read_write.c:599
    entry_SYSCALL_64_fastpath+0x1f/0xc2

The issue is caused by a lack of size check for the request size in
ep_write_iter which should be fixed.  It, however, points to another
problem, that SLUB defines KMALLOC_MAX_SIZE too large because the its
KMALLOC_SHIFT_MAX is (MAX_ORDER + PAGE_SHIFT) which means that the
resulting page allocator request might be MAX_ORDER which is too large
(see __alloc_pages_slowpath).

The same applies to the SLOB allocator which allows even larger sizes.
Make sure that they are capped properly and never request more than
MAX_ORDER order.

Link: http://lkml.kernel.org/r/20161220130659.16461-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 084b12bad198..4c5363566815 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -226,7 +226,7 @@ static inline const char *__check_heap_object(const void *ptr,
  * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
-#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT)
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	3
 #endif
@@ -239,7 +239,7 @@ static inline const char *__check_heap_object(const void *ptr,
  * be allocated from the same page.
  */
 #define KMALLOC_SHIFT_HIGH	PAGE_SHIFT
-#define KMALLOC_SHIFT_MAX	30
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	3
 #endif

From 7984c27c2c5cd3298de8afdba3e1bd46f884e934 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:30 -0800
Subject: [PATCH 196/297] bpf: do not use KMALLOC_SHIFT_MAX

Commit 01b3f52157ff ("bpf: fix allocation warnings in bpf maps and
integer overflow") has added checks for the maximum allocateable size.
It (ab)used KMALLOC_SHIFT_MAX for that purpose.

While this is not incorrect it is not very clean because we already have
KMALLOC_MAX_SIZE for this very reason so let's change both checks to use
KMALLOC_MAX_SIZE instead.

The original motivation for using KMALLOC_SHIFT_MAX was to work around
an incorrect KMALLOC_MAX_SIZE which could lead to allocation warnings
but it is no longer needed since "slab: make sure that KMALLOC_MAX_SIZE
will fit into MAX_ORDER".

Link: http://lkml.kernel.org/r/20161220130659.16461-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/bpf/arraymap.c | 2 +-
 kernel/bpf/hashtab.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index a2ac051c342f..229a5d5df977 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -56,7 +56,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	    attr->value_size == 0 || attr->map_flags)
 		return ERR_PTR(-EINVAL);
 
-	if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+	if (attr->value_size > KMALLOC_MAX_SIZE)
 		/* if value_size is bigger, the user space won't be able to
 		 * access the elements.
 		 */
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 34debc1a9641..3f2bb58952d8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -274,7 +274,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		 */
 		goto free_htab;
 
-	if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+	if (htab->map.value_size >= KMALLOC_MAX_SIZE -
 	    MAX_BPF_STACK - sizeof(struct htab_elem))
 		/* if value_size is bigger, the user space won't be able to
 		 * access the elements via bpf syscall. This check also makes

From e7ee2c089e94067d68475990bdeed211c8852917 Mon Sep 17 00:00:00 2001
From: Eric Ren <zren@suse.com>
Date: Tue, 10 Jan 2017 16:57:33 -0800
Subject: [PATCH 197/297] ocfs2: fix crash caused by stale lvb with fsdlm
 plugin

The crash happens rather often when we reset some cluster nodes while
nodes contend fiercely to do truncate and append.

The crash backtrace is below:

   dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources
   dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms
   ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18)
   ocfs2: End replay journal (node 318952601, slot 2) on device (253,18)
   ocfs2: Beginning quota recovery on device (253,18) for slot 2
   ocfs2: Finishing quota recovery on device (253,18) for slot 2
   (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode)
   (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1
   ------------[ cut here ]------------
   kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470!
   invalid opcode: 0000 [#1] SMP
   Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod    iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport      joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix               drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd       usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
   Supported: No, Unsupported modules are loaded
   CPU: 1 PID: 30154 Comm: truncate Tainted: G           OE   N  4.4.21-69-default #1
   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
   task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000
   RIP: 0010:[<ffffffffa05c8c30>]  [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]
   RSP: 0018:ffff880074e6bd50  EFLAGS: 00010282
   RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000
   RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246
   RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414
   R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448
   R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020
   FS:  00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
   CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
   CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0
   Call Trace:
     ocfs2_setattr+0x698/0xa90 [ocfs2]
     notify_change+0x1ae/0x380
     do_truncate+0x5e/0x90
     do_sys_ftruncate.constprop.11+0x108/0x160
     entry_SYSCALL_64_fastpath+0x12/0x6d
   Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff
   RIP  [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2]

It's because ocfs2_inode_lock() get us stale LVB in which the i_size is
not equal to the disk i_size.  We mistakenly trust the LVB because the
underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with
DLM_SBF_VALNOTVALID properly for us.  But, why?

The current code tries to downconvert lock without DLM_LKF_VALBLK flag
to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even
if the lock resource type needs LVB.  This is not the right way for
fsdlm.

The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on
DLM_LKF_VALBLK to decide if we care about the LVB in the LKB.  If
DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from
this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node
failure happens.

The following diagram briefly illustrates how this crash happens:

RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB;

The 1st round:

             Node1                                    Node2
RSB1: PR
                                                  RSB1(master): NULL->EX
ocfs2_downconvert_lock(PR->NULL, set_lvb==0)
  ocfs2_dlm_lock(no DLM_LKF_VALBLK)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

dlm_lock(no DLM_LKF_VALBLK)
  convert_lock(overwrite lkb->lkb_exflags
               with no DLM_LKF_VALBLK)

RSB1: NULL                                        RSB1: EX
                                                  reset Node2
dlm_recover_rsbs()
  recover_lvb()

/* The LVB is not trustable if the node with EX fails and
 * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1.
 */

 if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to
           return;                   * to invalid the LVB here.
                                     */

The 2nd round:

         Node 1                                Node2
RSB1(become master from recovery)

ocfs2_setattr()
  ocfs2_inode_lock(NULL->EX)
    /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */
    ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */
  ocfs2_truncate_file()
      mlog_bug_on_msg(disk isize != i_size_read(inode))  /* crash! */

The fix is quite straightforward.  We keep to set DLM_LKF_VALBLK flag
for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin
is uesed.

Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com
Signed-off-by: Eric Ren <zren@suse.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c   | 10 ++++++++++
 fs/ocfs2/stackglue.c |  6 ++++++
 fs/ocfs2/stackglue.h |  3 +++
 3 files changed, 19 insertions(+)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 83d576f6a287..77d1632e905d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
 	     lockres->l_level, new_level);
 
+	/*
+	 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
+	 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
+	 * we can recover correctly from node failure. Otherwise, we may get
+	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
+	 */
+	if (!ocfs2_is_o2cb_active() &&
+	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+		lvb = 1;
+
 	if (lvb)
 		dlm_flags |= DLM_LKF_VALBLK;
 
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 52c07346bea3..820359096c7a 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
  */
 static struct ocfs2_stack_plugin *active_stack;
 
+inline int ocfs2_is_o2cb_active(void)
+{
+	return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
+}
+EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
+
 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
 {
 	struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index f2dce10fae54..e3036e1790e8 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
 
+/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
+int ocfs2_is_o2cb_active(void);
+
 extern struct kset *ocfs2_kset;
 
 #endif  /* STACKGLUE_H */

From f931ab479dd24cf7a2c6e2df19778406892591fb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 10 Jan 2017 16:57:36 -0800
Subject: [PATCH 198/297] mm: fix devm_memremap_pages crash, use
 mem_hotplug_{begin, done}

Both arch_add_memory() and arch_remove_memory() expect a single threaded
context.

For example, arch/x86/mm/init_64.c::kernel_physical_mapping_init() does
not hold any locks over this check and branch:

    if (pgd_val(*pgd)) {
    	pud = (pud_t *)pgd_page_vaddr(*pgd);
    	paddr_last = phys_pud_init(pud, __pa(vaddr),
    				   __pa(vaddr_end),
    				   page_size_mask);
    	continue;
    }

    pud = alloc_low_page();
    paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
    			   page_size_mask);

The result is that two threads calling devm_memremap_pages()
simultaneously can end up colliding on pgd initialization.  This leads
to crash signatures like the following where the loser of the race
initializes the wrong pgd entry:

    BUG: unable to handle kernel paging request at ffff888ebfff0000
    IP: memcpy_erms+0x6/0x10
    PGD 2f8e8fc067 PUD 0 /* <---- Invalid PUD */
    Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
    CPU: 54 PID: 3818 Comm: systemd-udevd Not tainted 4.6.7+ #13
    task: ffff882fac290040 ti: ffff882f887a4000 task.ti: ffff882f887a4000
    RIP: memcpy_erms+0x6/0x10
    [..]
    Call Trace:
      ? pmem_do_bvec+0x205/0x370 [nd_pmem]
      ? blk_queue_enter+0x3a/0x280
      pmem_rw_page+0x38/0x80 [nd_pmem]
      bdev_read_page+0x84/0xb0

Hold the standard memory hotplug mutex over calls to
arch_{add,remove}_memory().

Fixes: 41e94a851304 ("add devm_memremap_pages")
Link: http://lkml.kernel.org/r/148357647831.9498.12606007370121652979.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/memremap.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index b501e390bb34..9ecedc28b928 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
 	/* pages are dead and unused, undo the arch mapping */
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(resource_size(res), SECTION_SIZE);
+	mem_hotplug_begin();
 	arch_remove_memory(align_start, align_size);
+	mem_hotplug_done();
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
 	pgmap_radix_release(res);
 	dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
@@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 	if (error)
 		goto err_pfn_remap;
 
+	mem_hotplug_begin();
 	error = arch_add_memory(nid, align_start, align_size, true);
+	mem_hotplug_done();
 	if (error)
 		goto err_add_memory;
 

From 2df26639e708a88dcc22171949da638a9998f3bc Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:39 -0800
Subject: [PATCH 199/297] mm: fix remote numa hits statistics

Jia He has noticed that commit b9f00e147f27 ("mm, page_alloc: reduce
branches in zone_statistics") has an unintentional side effect that
remote node allocation requests are accounted as NUMA_MISS rathat than
NUMA_HIT and NUMA_OTHER if such a request doesn't use __GFP_OTHER_NODE.

There are many of these potentially because the flag is used very rarely
while we have many users of __alloc_pages_node.

Fix this by simply ignoring __GFP_OTHER_NODE (it can be removed in a
follow up patch) and treat all allocations that were satisfied from the
preferred zone's node as NUMA_HITS because this is the same node we
requested the allocation from in most cases.  If this is not the local
node then we just account it as NUMA_OTHER rather than NUMA_LOCAL.

One downsize would be that an allocation request for a node which is
outside of the mempolicy nodemask would be reported as a hit which is a
bit weird but that was the case before b9f00e147f27 already.

Fixes: b9f00e147f27 ("mm, page_alloc: reduce branches in zone_statistics")
Link: http://lkml.kernel.org/r/20170102153057.9451-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Jia He <hejianet@gmail.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz> # with cbmc[1] superpowers
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2c6d5f64feca..cba2a64792e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2583,30 +2583,23 @@ int __isolate_free_page(struct page *page, unsigned int order)
  * Update NUMA hit/miss statistics
  *
  * Must be called with interrupts disabled.
- *
- * When __GFP_OTHER_NODE is set assume the node of the preferred
- * zone is the local node. This is useful for daemons who allocate
- * memory on behalf of other processes.
  */
 static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
 								gfp_t flags)
 {
 #ifdef CONFIG_NUMA
-	int local_nid = numa_node_id();
 	enum zone_stat_item local_stat = NUMA_LOCAL;
 
-	if (unlikely(flags & __GFP_OTHER_NODE)) {
+	if (z->node != numa_node_id())
 		local_stat = NUMA_OTHER;
-		local_nid = preferred_zone->node;
-	}
 
-	if (z->node == local_nid) {
+	if (z->node == preferred_zone->node)
 		__inc_zone_state(z, NUMA_HIT);
-		__inc_zone_state(z, local_stat);
-	} else {
+	else {
 		__inc_zone_state(z, NUMA_MISS);
 		__inc_zone_state(preferred_zone, NUMA_FOREIGN);
 	}
+	__inc_zone_state(z, local_stat);
 #endif
 }
 

From 41b6167e8f746b475668f1da78599fc4284f18db Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:42 -0800
Subject: [PATCH 200/297] mm: get rid of __GFP_OTHER_NODE

The flag was introduced by commit 78afd5612deb ("mm: add
__GFP_OTHER_NODE flag") to allow proper accounting of remote node
allocations done by kernel daemons on behalf of a process - e.g.
khugepaged.

After "mm: fix remote numa hits statistics" we do not need and actually
use the flag so we can safely remove it because all allocations which
are satisfied from their "home" node are accounted properly.

[mhocko@suse.com: fix build]
Link: http://lkml.kernel.org/r/20170106122225.GK5556@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/20170102153057.9451-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h            | 13 +++----------
 include/trace/events/mmflags.h |  3 +--
 mm/huge_memory.c               |  3 +--
 mm/khugepaged.c                |  5 ++---
 mm/page_alloc.c                |  5 ++---
 tools/perf/builtin-kmem.c      |  1 -
 6 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4175dca4ac39..7806a8f80abc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -38,9 +38,8 @@ struct vm_area_struct;
 #define ___GFP_ACCOUNT		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_DIRECT_RECLAIM	0x400000u
-#define ___GFP_OTHER_NODE	0x800000u
-#define ___GFP_WRITE		0x1000000u
-#define ___GFP_KSWAPD_RECLAIM	0x2000000u
+#define ___GFP_WRITE		0x800000u
+#define ___GFP_KSWAPD_RECLAIM	0x1000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -172,11 +171,6 @@ struct vm_area_struct;
  * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
  *   distinguishing in the source between false positives and allocations that
  *   cannot be supported (e.g. page tables).
- *
- * __GFP_OTHER_NODE is for allocations that are on a remote node but that
- *   should not be accounted for as a remote allocation in vmstat. A
- *   typical user would be khugepaged collapsing a huge page on a remote
- *   node.
  */
 #define __GFP_COLD	((__force gfp_t)___GFP_COLD)
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
@@ -184,10 +178,9 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_SHIFT 25
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 9e687ca9a307..15bf875d0e4a 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -47,8 +47,7 @@
 	{(unsigned long)__GFP_WRITE,		"__GFP_WRITE"},		\
 	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
 	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
-	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
-	{(unsigned long)__GFP_OTHER_NODE,	"__GFP_OTHER_NODE"}	\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"}\
 
 #define show_gfp_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 10eedbf14421..72339a646fb1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -919,8 +919,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
 	}
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
-		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
-					       __GFP_OTHER_NODE, vma,
+		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
 					       vmf->address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
 			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b0924a68cc36..77ae3239c3de 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -943,7 +943,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
 	/* Only allocate from the target node */
-	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
+	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
 
 	/*
 	 * Before allocating the hugepage, release the mmap_sem read lock.
@@ -1309,8 +1309,7 @@ static void collapse_shmem(struct mm_struct *mm,
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
 	/* Only allocate from the target node */
-	gfp = alloc_hugepage_khugepaged_gfpmask() |
-		__GFP_OTHER_NODE | __GFP_THISNODE;
+	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
 
 	new_page = khugepaged_alloc_page(hpage, gfp, node);
 	if (!new_page) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cba2a64792e6..872caae544ef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2584,8 +2584,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
  *
  * Must be called with interrupts disabled.
  */
-static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
-								gfp_t flags)
+static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 {
 #ifdef CONFIG_NUMA
 	enum zone_stat_item local_stat = NUMA_LOCAL;
@@ -2667,7 +2666,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 	}
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
-	zone_statistics(preferred_zone, zone, gfp_flags);
+	zone_statistics(preferred_zone, zone);
 	local_irq_restore(flags);
 
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 35a02f8e5a4a..915869e00d86 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -655,7 +655,6 @@ static const struct {
 	{ "__GFP_RECLAIM",		"R" },
 	{ "__GFP_DIRECT_RECLAIM",	"DR" },
 	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
-	{ "__GFP_OTHER_NODE",		"ON" },
 };
 
 static size_t max_gfp_len;

From da0510c47519fe0999cffe316e1d370e29f952be Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Tue, 10 Jan 2017 16:57:45 -0800
Subject: [PATCH 201/297] lib/Kconfig.debug: fix frv build failure

The build of frv allmodconfig was failing with the errors like:

  /tmp/cc0JSPc3.s: Assembler messages:
  /tmp/cc0JSPc3.s:1839: Error: symbol `.LSLT0' is already defined
  /tmp/cc0JSPc3.s:1842: Error: symbol `.LASLTP0' is already defined
  /tmp/cc0JSPc3.s:1969: Error: symbol `.LELTP0' is already defined
  /tmp/cc0JSPc3.s:1970: Error: symbol `.LELT0' is already defined

Commit 866ced950bcd ("kbuild: Support split debug info v4") introduced
splitting the debug info and keeping that in a separate file.  Somehow,
the frv-linux gcc did not like that and I am guessing that instead of
splitting it started copying.  The first report about this is at:

  https://lists.01.org/pipermail/kbuild-all/2015-July/010527.html.

I will try and see if this can work with frv and if still fails I will
open a bug report with gcc.  But meanwhile this is the easiest option to
solve build failure of frv.

Fixes: 866ced950bcd ("kbuild: Support split debug info v4")
Link: http://lkml.kernel.org/r/1482062348-5352-1-git-send-email-sudipm.mukherjee@gmail.com
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b06848a104e6..eb9e9a7870fa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -164,7 +164,7 @@ config DEBUG_INFO_REDUCED
 
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
-	depends on DEBUG_INFO
+	depends on DEBUG_INFO && !FRV
 	help
 	  Generate debug info into separate .dwo files. This significantly
 	  reduces the build directory size for builds with DEBUG_INFO,

From c626bc46edb0fec289adfc86b02e07d34127ef6c Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Tue, 10 Jan 2017 16:57:48 -0800
Subject: [PATCH 202/297] ipc/sem.c: fix incorrect sem_lock pairing

Based on the syzcaller test case from dvyukov:

  https://gist.githubusercontent.com/dvyukov/d0e5efefe4d7d6daed829f5c3ca26a40/raw/08d0a261fe3c987bed04fbf267e08ba04bd533ea/gistfile1.txt

The slow (i.e.: failure to acquire) syscall exit from semtimedop()
incorrectly assumed that the the same lock is acquired as it was at the
initial syscall entry.

This is wrong:
 - thread A: single semop semop(), sleeps
 - thread B: multi semop semop(), sleeps
 - thread A: woken up by signal/timeout

With this sequence, the initial sem_lock() call locks the per-semaphore
spinlock, and it is unlocked with sem_unlock().  The call at the syscall
return locks the global spinlock.  Because locknum is not updated, the
following sem_unlock() call unlocks the per-semaphore spinlock, which is
actually not locked.

The fix is trivial: Use the return value from sem_lock.

Fixes: 370b262c896e ("ipc/sem: avoid idr tree lookup for interrupted semop")
Link: http://lkml.kernel.org/r/1482215645-22328-1-git-send-email-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Johanna Abrahamsson <johanna@mjao.org>
Tested-by: Johanna Abrahamsson <johanna@mjao.org>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index e08b94851922..3ec5742b5640 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1977,7 +1977,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 		}
 
 		rcu_read_lock();
-		sem_lock(sma, sops, nsops);
+		locknum = sem_lock(sma, sops, nsops);
 
 		if (!ipc_valid_object(&sma->sem_perm))
 			goto out_unlock_free;

From 20f664aabeb88d582b623a625f83b0454fa34f07 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 10 Jan 2017 16:57:51 -0800
Subject: [PATCH 203/297] mm: pmd dirty emulation in page fault handler

Andreas reported [1] made a test in jemalloc hang in THP mode in arm64:

  http://lkml.kernel.org/r/mvmmvfy37g1.fsf@hawking.suse.de

The problem is currently page fault handler doesn't supports dirty bit
emulation of pmd for non-HW dirty-bit architecture so that application
stucks until VM marked the pmd dirty.

How the emulation work depends on the architecture.  In case of arm64,
when it set up pte firstly, it sets pte PTE_RDONLY to get a chance to
mark the pte dirty via triggering page fault when store access happens.
Once the page fault occurs, VM marks the pmd dirty and arch code for
setting pmd will clear PTE_RDONLY for application to proceed.

IOW, if VM doesn't mark the pmd dirty, application hangs forever by
repeated fault(i.e., store op but the pmd is PTE_RDONLY).

This patch enables pmd dirty-bit emulation for those architectures.

[1] b8d3c4c3009d, mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called

Fixes: b8d3c4c3009d ("mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called")
Link: http://lkml.kernel.org/r/1482506098-6149-1-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: Andreas Schwab <schwab@suse.de>
Tested-by: Andreas Schwab <schwab@suse.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Jason Evans <je@fb.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: <stable@vger.kernel.org> [4.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 72339a646fb1..9a6bd6c8d55a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -883,15 +883,17 @@ void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	pmd_t entry;
 	unsigned long haddr;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
 
 	vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
 		goto unlock;
 
 	entry = pmd_mkyoung(orig_pmd);
+	if (write)
+		entry = pmd_mkdirty(entry);
 	haddr = vmf->address & HPAGE_PMD_MASK;
-	if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
-				vmf->flags & FAULT_FLAG_WRITE))
+	if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry, write))
 		update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
 
 unlock:

From 2d39b3cd34e6d323720d4c61bd714f5ae202c022 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie.iles@oracle.com>
Date: Tue, 10 Jan 2017 16:57:54 -0800
Subject: [PATCH 204/297] signal: protect SIGNAL_UNKILLABLE from unintentional
 clearing.

Since commit 00cd5c37afd5 ("ptrace: permit ptracing of /sbin/init") we
can now trace init processes.  init is initially protected with
SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but
there are a number of paths during tracing where SIGNAL_UNKILLABLE can
be implicitly cleared.

This can result in init becoming stoppable/killable after tracing.  For
example, running:

  while true; do kill -STOP 1; done &
  strace -p 1

and then stopping strace and the kill loop will result in init being
left in state TASK_STOPPED.  Sending SIGCONT to init will resume it, but
init will now respond to future SIGSTOP signals rather than ignoring
them.

Make sure that when setting SIGNAL_STOP_CONTINUED/SIGNAL_STOP_STOPPED
that we don't clear SIGNAL_UNKILLABLE.

Link: http://lkml.kernel.org/r/20170104122017.25047-1-jamie.iles@oracle.com
Signed-off-by: Jamie Iles <jamie.iles@oracle.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 10 ++++++++++
 kernel/signal.c       |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d1905245c7a..ad3ec9ec61f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,6 +854,16 @@ struct signal_struct {
 
 #define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
 
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+			  SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+					 unsigned int flags)
+{
+	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
 /* If true, all threads except ->group_exit_task have pending SIGKILL */
 static inline int signal_group_exit(const struct signal_struct *sig)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index ff046b73ff2d..3603d93a1968 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
 	 * fresh group stop.  Read comment in do_signal_stop() for details.
 	 */
 	if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
-		sig->flags = SIGNAL_STOP_STOPPED;
+		signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
 		return true;
 	}
 	return false;
@@ -843,7 +843,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
 			 * will take ->siglock, notice SIGNAL_CLD_MASK, and
 			 * notify its parent. See get_signal_to_deliver().
 			 */
-			signal->flags = why | SIGNAL_STOP_CONTINUED;
+			signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
 			signal->group_stop_count = 0;
 			signal->group_exit_code = 0;
 		}

From 9ebf73b275f06b114586af27cda3fd72e149d5ba Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 10 Jan 2017 16:57:57 -0800
Subject: [PATCH 205/297] mailmap: add codeaurora.org names for nameless email
 commits

Some codeaurora.org emails have crept in but the names don't exist for
them.  Add the names for the emails so git can match everyone up.

Link: http://lkml.kernel.org/r/20170104194611.25933-1-sboyd@codeaurora.org
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Sarangdhar Joshi <spjoshi@codeaurora.org>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Subhash Jadavani <subhashj@codeaurora.org>
Cc: Thomas Pedersen <twp@codeaurora.org>
Cc: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.mailmap b/.mailmap
index 02d261407683..67dc22ffc9a8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -137,6 +137,7 @@ Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
 Rudolf Marek <R.Marek@sh.cvut.cz>
 Rui Saraiva <rmps@joel.ist.utl.pt>
 Sachin P Sant <ssant@in.ibm.com>
+Sarangdhar Joshi <spjoshi@codeaurora.org>
 Sam Ravnborg <sam@mars.ravnborg.org>
 Santosh Shilimkar <ssantosh@kernel.org>
 Santosh Shilimkar <santosh.shilimkar@oracle.org>
@@ -150,10 +151,13 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
 Simon Kelley <simon@thekelleys.org.uk>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <shemminger@osdl.org>
+Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Subhash Jadavani <subhashj@codeaurora.org>
 Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
 Sumit Semwal <sumit.semwal@ti.com>
 Tejun Heo <htejun@gmail.com>
 Thomas Graf <tgraf@suug.ch>
+Thomas Pedersen <twp@codeaurora.org>
 Tony Luck <tony.luck@intel.com>
 Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
 Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>

From f073bdc51771f5a5c7a8d1191bfc3ae371d44de7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 10 Jan 2017 16:58:00 -0800
Subject: [PATCH 206/297] mm: don't dereference struct page fields of invalid
 pages

The VM_BUG_ON() check in move_freepages() checks whether the node id of
a page matches the node id of its zone.  However, it does this before
having checked whether the struct page pointer refers to a valid struct
page to begin with.  This is guaranteed in most cases, but may not be
the case if CONFIG_HOLES_IN_ZONE=y.

So reorder the VM_BUG_ON() with the pfn_valid_within() check.

Link: http://lkml.kernel.org/r/1481706707-6211-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Robert Richter <rrichter@cavium.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 872caae544ef..74afdb4177cb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1864,14 +1864,14 @@ int move_freepages(struct zone *zone,
 #endif
 
 	for (page = start_page; page <= end_page;) {
-		/* Make sure we are not inadvertently changing nodes */
-		VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
-
 		if (!pfn_valid_within(page_to_pfn(page))) {
 			page++;
 			continue;
 		}
 
+		/* Make sure we are not inadvertently changing nodes */
+		VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
+
 		if (!PageBuddy(page)) {
 			page++;
 			continue;

From b4536f0c829c8586544c94735c343f9b5070bd01 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:58:04 -0800
Subject: [PATCH 207/297] mm, memcg: fix the active list aging for lowmem
 requests when memcg is enabled

Nils Holland and Klaus Ethgen have reported unexpected OOM killer
invocations with 32b kernel starting with 4.8 kernels

	kworker/u4:5 invoked oom-killer: gfp_mask=0x2400840(GFP_NOFS|__GFP_NOFAIL), nodemask=0, order=0, oom_score_adj=0
	kworker/u4:5 cpuset=/ mems_allowed=0
	CPU: 1 PID: 2603 Comm: kworker/u4:5 Not tainted 4.9.0-gentoo #2
	[...]
	Mem-Info:
	active_anon:58685 inactive_anon:90 isolated_anon:0
	 active_file:274324 inactive_file:281962 isolated_file:0
	 unevictable:0 dirty:649 writeback:0 unstable:0
	 slab_reclaimable:40662 slab_unreclaimable:17754
	 mapped:7382 shmem:202 pagetables:351 bounce:0
	 free:206736 free_pcp:332 free_cma:0
	Node 0 active_anon:234740kB inactive_anon:360kB active_file:1097296kB inactive_file:1127848kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:29528kB dirty:2596kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 184320kB anon_thp: 808kB writeback_tmp:0kB unstable:0kB pages_scanned:0 all_unreclaimable? no
	DMA free:3952kB min:788kB low:984kB high:1180kB active_anon:0kB inactive_anon:0kB active_file:7316kB inactive_file:0kB unevictable:0kB writepending:96kB present:15992kB managed:15916kB mlocked:0kB slab_reclaimable:3200kB slab_unreclaimable:1408kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
	lowmem_reserve[]: 0 813 3474 3474
	Normal free:41332kB min:41368kB low:51708kB high:62048kB active_anon:0kB inactive_anon:0kB active_file:532748kB inactive_file:44kB unevictable:0kB writepending:24kB present:897016kB managed:836248kB mlocked:0kB slab_reclaimable:159448kB slab_unreclaimable:69608kB kernel_stack:1112kB pagetables:1404kB bounce:0kB free_pcp:528kB local_pcp:340kB free_cma:0kB
	lowmem_reserve[]: 0 0 21292 21292
	HighMem free:781660kB min:512kB low:34356kB high:68200kB active_anon:234740kB inactive_anon:360kB active_file:557232kB inactive_file:1127804kB unevictable:0kB writepending:2592kB present:2725384kB managed:2725384kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:800kB local_pcp:608kB free_cma:0kB

the oom killer is clearly pre-mature because there there is still a lot
of page cache in the zone Normal which should satisfy this lowmem
request.  Further debugging has shown that the reclaim cannot make any
forward progress because the page cache is hidden in the active list
which doesn't get rotated because inactive_list_is_low is not memcg
aware.

The code simply subtracts per-zone highmem counters from the respective
memcg's lru sizes which doesn't make any sense.  We can simply end up
always seeing the resulting active and inactive counts 0 and return
false.  This issue is not limited to 32b kernels but in practice the
effect on systems without CONFIG_HIGHMEM would be much harder to notice
because we do not invoke the OOM killer for allocations requests
targeting < ZONE_NORMAL.

Fix the issue by tracking per zone lru page counts in mem_cgroup_per_node
and subtract per-memcg highmem counts when memcg is enabled.  Introduce
helper lruvec_zone_lru_size which redirects to either zone counters or
mem_cgroup_get_zone_lru_size when appropriate.

We are losing empty LRU but non-zero lru size detection introduced by
ca707239e8a7 ("mm: update_lru_size warn and reset bad lru_size") because
of the inherent zone vs. node discrepancy.

Fixes: f8d1a31163fc ("mm: consider whether to decivate based on eligible zones inactive ratio")
Link: http://lkml.kernel.org/r/20170104100825.3729-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Nils Holland <nholland@tisys.org>
Tested-by: Nils Holland <nholland@tisys.org>
Reported-by: Klaus Ethgen <Klaus@Ethgen.de>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++++++++---
 include/linux/mm_inline.h  |  2 +-
 mm/memcontrol.c            | 18 ++++++++----------
 mm/vmscan.c                | 27 +++++++++++++++++----------
 4 files changed, 49 insertions(+), 24 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 61d20c17f3b7..254698856b8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter {
  */
 struct mem_cgroup_per_node {
 	struct lruvec		lruvec;
-	unsigned long		lru_size[NR_LRU_LISTS];
+	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 
@@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
-		int nr_pages);
+		int zid, int nr_pages);
 
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask);
@@ -441,9 +441,23 @@ static inline
 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	struct mem_cgroup_per_node *mz;
+	unsigned long nr_pages = 0;
+	int zid;
 
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	return mz->lru_size[lru];
+	for (zid = 0; zid < MAX_NR_ZONES; zid++)
+		nr_pages += mz->lru_zone_size[zid][lru];
+	return nr_pages;
+}
+
+static inline
+unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
+		enum lru_list lru, int zone_idx)
+{
+	struct mem_cgroup_per_node *mz;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	return mz->lru_zone_size[zone_idx][lru];
 }
 
 void mem_cgroup_handle_over_high(void);
@@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	return 0;
 }
+static inline
+unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
+		enum lru_list lru, int zone_idx)
+{
+	return 0;
+}
 
 static inline unsigned long
 mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 71613e8a720f..41d376e7116d 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
 {
 	__update_lru_size(lruvec, lru, zid, nr_pages);
 #ifdef CONFIG_MEMCG
-	mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+	mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
 #endif
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4048897e7b01..a63a8f832664 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -625,8 +625,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask)
 {
+	struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
 	unsigned long nr = 0;
-	struct mem_cgroup_per_node *mz;
 	enum lru_list lru;
 
 	VM_BUG_ON((unsigned)nid >= nr_node_ids);
@@ -634,8 +634,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 	for_each_lru(lru) {
 		if (!(BIT(lru) & lru_mask))
 			continue;
-		mz = mem_cgroup_nodeinfo(memcg, nid);
-		nr += mz->lru_size[lru];
+		nr += mem_cgroup_get_lru_size(lruvec, lru);
 	}
 	return nr;
 }
@@ -1002,6 +1001,7 @@ out:
  * mem_cgroup_update_lru_size - account for adding or removing an lru page
  * @lruvec: mem_cgroup per zone lru vector
  * @lru: index of lru list the page is sitting on
+ * @zid: zone id of the accounted pages
  * @nr_pages: positive when adding or negative when removing
  *
  * This function must be called under lru_lock, just before a page is added
@@ -1009,27 +1009,25 @@ out:
  * so as to allow it to check that lru_size 0 is consistent with list_empty).
  */
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
-				int nr_pages)
+				int zid, int nr_pages)
 {
 	struct mem_cgroup_per_node *mz;
 	unsigned long *lru_size;
 	long size;
-	bool empty;
 
 	if (mem_cgroup_disabled())
 		return;
 
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	lru_size = mz->lru_size + lru;
-	empty = list_empty(lruvec->lists + lru);
+	lru_size = &mz->lru_zone_size[zid][lru];
 
 	if (nr_pages < 0)
 		*lru_size += nr_pages;
 
 	size = *lru_size;
-	if (WARN_ONCE(size < 0 || empty != !size,
-		"%s(%p, %d, %d): lru_size %ld but %sempty\n",
-		__func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) {
+	if (WARN_ONCE(size < 0,
+		"%s(%p, %d, %d): lru_size %ld\n",
+		__func__, lruvec, lru, nr_pages, size)) {
 		VM_BUG_ON(1);
 		*lru_size = 0;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6aa5b01d3e75..532a2a750952 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -242,6 +242,16 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
 	return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
 }
 
+unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
+				   int zone_idx)
+{
+	if (!mem_cgroup_disabled())
+		return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
+
+	return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
+			       NR_ZONE_LRU_BASE + lru);
+}
+
 /*
  * Add a shrinker callback to be called from the vm.
  */
@@ -1382,8 +1392,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
  * be complete before mem_cgroup_update_lru_size due to a santity check.
  */
 static __always_inline void update_lru_sizes(struct lruvec *lruvec,
-			enum lru_list lru, unsigned long *nr_zone_taken,
-			unsigned long nr_taken)
+			enum lru_list lru, unsigned long *nr_zone_taken)
 {
 	int zid;
 
@@ -1392,11 +1401,11 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 			continue;
 
 		__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
+#ifdef CONFIG_MEMCG
+		mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
+#endif
 	}
 
-#ifdef CONFIG_MEMCG
-	mem_cgroup_update_lru_size(lruvec, lru, -nr_taken);
-#endif
 }
 
 /*
@@ -1501,7 +1510,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	*nr_scanned = scan;
 	trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
 				    nr_taken, mode, is_file_lru(lru));
-	update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken);
+	update_lru_sizes(lruvec, lru, nr_zone_taken);
 	return nr_taken;
 }
 
@@ -2047,10 +2056,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 		if (!managed_zone(zone))
 			continue;
 
-		inactive_zone = zone_page_state(zone,
-				NR_ZONE_LRU_BASE + (file * LRU_FILE));
-		active_zone = zone_page_state(zone,
-				NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
+		inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
+		active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
 
 		inactive -= min(inactive, inactive_zone);
 		active -= min(active, active_zone);

From 8c2dd3e4a4bae78093c4a5cee6494877651be3c9 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 10 Jan 2017 16:58:06 -0800
Subject: [PATCH 208/297] mm: rename __alloc_page_frag to page_frag_alloc and
 __free_page_frag to page_frag_free

Patch series "Page fragment updates", v4.

This patch series takes care of a few cleanups for the page fragments
API.

First we do some renames so that things are much more consistent.  First
we move the page_frag_ portion of the name to the front of the functions
names.  Secondly we split out the cache specific functions from the
other page fragment functions by adding the word "cache" to the name.

Finally I added a bit of documentation that will hopefully help to
explain some of this.  I plan to revisit this later as we get things
more ironed out in the near future with the changes planned for the DMA
setup to support eXpress Data Path.

This patch (of 3):

This patch renames the page frag functions to be more consistent with
other APIs.  Specifically we place the name page_frag first in the name
and then have either an alloc or free call name that we append as the
suffix.  This makes it a bit clearer in terms of naming.

In addition we drop the leading double underscores since we are
technically no longer a backing interface and instead the front end that
is called from the networking APIs.

Link: http://lkml.kernel.org/r/20170104023854.13451.67390.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    |  6 +++---
 include/linux/skbuff.h |  2 +-
 mm/page_alloc.c        | 10 +++++-----
 net/core/skbuff.c      |  8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7806a8f80abc..ed77a86fbbb0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -501,9 +501,9 @@ extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 struct page_frag_cache;
 extern void __page_frag_drain(struct page *page, unsigned int order,
 			      unsigned int count);
-extern void *__alloc_page_frag(struct page_frag_cache *nc,
-			       unsigned int fragsz, gfp_t gfp_mask);
-extern void __free_page_frag(void *addr);
+extern void *page_frag_alloc(struct page_frag_cache *nc,
+			     unsigned int fragsz, gfp_t gfp_mask);
+extern void page_frag_free(void *addr);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cfd417e..a410715bbef8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2480,7 +2480,7 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 
 static inline void skb_free_frag(void *addr)
 {
-	__free_page_frag(addr);
+	page_frag_free(addr);
 }
 
 void *napi_alloc_frag(unsigned int fragsz);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74afdb4177cb..097893ffe194 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3931,8 +3931,8 @@ void __page_frag_drain(struct page *page, unsigned int order,
 }
 EXPORT_SYMBOL(__page_frag_drain);
 
-void *__alloc_page_frag(struct page_frag_cache *nc,
-			unsigned int fragsz, gfp_t gfp_mask)
+void *page_frag_alloc(struct page_frag_cache *nc,
+		      unsigned int fragsz, gfp_t gfp_mask)
 {
 	unsigned int size = PAGE_SIZE;
 	struct page *page;
@@ -3983,19 +3983,19 @@ refill:
 
 	return nc->va + offset;
 }
-EXPORT_SYMBOL(__alloc_page_frag);
+EXPORT_SYMBOL(page_frag_alloc);
 
 /*
  * Frees a page fragment allocated out of either a compound or order 0 page.
  */
-void __free_page_frag(void *addr)
+void page_frag_free(void *addr)
 {
 	struct page *page = virt_to_head_page(addr);
 
 	if (unlikely(put_page_testzero(page)))
 		__free_pages_ok(page, compound_order(page));
 }
-EXPORT_SYMBOL(__free_page_frag);
+EXPORT_SYMBOL(page_frag_free);
 
 static void *make_alloc_exact(unsigned long addr, unsigned int order,
 		size_t size)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5a03730fbc1a..734c71468b01 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -369,7 +369,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 
 	local_irq_save(flags);
 	nc = this_cpu_ptr(&netdev_alloc_cache);
-	data = __alloc_page_frag(nc, fragsz, gfp_mask);
+	data = page_frag_alloc(nc, fragsz, gfp_mask);
 	local_irq_restore(flags);
 	return data;
 }
@@ -391,7 +391,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
+	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
 }
 
 void *napi_alloc_frag(unsigned int fragsz)
@@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
 	local_irq_save(flags);
 
 	nc = this_cpu_ptr(&netdev_alloc_cache);
-	data = __alloc_page_frag(nc, len, gfp_mask);
+	data = page_frag_alloc(nc, len, gfp_mask);
 	pfmemalloc = nc->pfmemalloc;
 
 	local_irq_restore(flags);
@@ -505,7 +505,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	if (sk_memalloc_socks())
 		gfp_mask |= __GFP_MEMALLOC;
 
-	data = __alloc_page_frag(&nc->page, len, gfp_mask);
+	data = page_frag_alloc(&nc->page, len, gfp_mask);
 	if (unlikely(!data))
 		return NULL;
 

From 2976db8018532b624c4123ae662fbc0814877abf Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 10 Jan 2017 16:58:09 -0800
Subject: [PATCH 209/297] mm: rename __page_frag functions to
 __page_frag_cache, drop order from drain

This patch does two things.

First it goes through and renames the __page_frag prefixed functions to
__page_frag_cache so that we can be clear that we are draining or
refilling the cache, not the frags themselves.

Second we drop the order parameter from __page_frag_cache_drain since we
don't actually need to pass it since all fragments are either order 0 or
must be a compound page.

Link: http://lkml.kernel.org/r/20170104023954.13451.5678.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/ethernet/intel/igb/igb_main.c |  6 +++---
 include/linux/gfp.h                       |  3 +--
 mm/page_alloc.c                           | 13 +++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a761001308dc..1515abaa5ac9 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3962,8 +3962,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
 				     PAGE_SIZE,
 				     DMA_FROM_DEVICE,
 				     DMA_ATTR_SKIP_CPU_SYNC);
-		__page_frag_drain(buffer_info->page, 0,
-				  buffer_info->pagecnt_bias);
+		__page_frag_cache_drain(buffer_info->page,
+					buffer_info->pagecnt_bias);
 
 		buffer_info->page = NULL;
 	}
@@ -6991,7 +6991,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
 				     PAGE_SIZE, DMA_FROM_DEVICE,
 				     DMA_ATTR_SKIP_CPU_SYNC);
-		__page_frag_drain(page, 0, rx_buffer->pagecnt_bias);
+		__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
 	}
 
 	/* clear contents of rx_buffer */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed77a86fbbb0..0fe0b6295ab5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -499,8 +499,7 @@ extern void free_hot_cold_page(struct page *page, bool cold);
 extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 
 struct page_frag_cache;
-extern void __page_frag_drain(struct page *page, unsigned int order,
-			      unsigned int count);
+extern void __page_frag_cache_drain(struct page *page, unsigned int count);
 extern void *page_frag_alloc(struct page_frag_cache *nc,
 			     unsigned int fragsz, gfp_t gfp_mask);
 extern void page_frag_free(void *addr);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 097893ffe194..d604d2596b7b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3896,8 +3896,8 @@ EXPORT_SYMBOL(free_pages);
  * drivers to provide a backing region of memory for use as either an
  * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
  */
-static struct page *__page_frag_refill(struct page_frag_cache *nc,
-				       gfp_t gfp_mask)
+static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
+					     gfp_t gfp_mask)
 {
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
@@ -3917,19 +3917,20 @@ static struct page *__page_frag_refill(struct page_frag_cache *nc,
 	return page;
 }
 
-void __page_frag_drain(struct page *page, unsigned int order,
-		       unsigned int count)
+void __page_frag_cache_drain(struct page *page, unsigned int count)
 {
 	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
 
 	if (page_ref_sub_and_test(page, count)) {
+		unsigned int order = compound_order(page);
+
 		if (order == 0)
 			free_hot_cold_page(page, false);
 		else
 			__free_pages_ok(page, order);
 	}
 }
-EXPORT_SYMBOL(__page_frag_drain);
+EXPORT_SYMBOL(__page_frag_cache_drain);
 
 void *page_frag_alloc(struct page_frag_cache *nc,
 		      unsigned int fragsz, gfp_t gfp_mask)
@@ -3940,7 +3941,7 @@ void *page_frag_alloc(struct page_frag_cache *nc,
 
 	if (unlikely(!nc->va)) {
 refill:
-		page = __page_frag_refill(nc, gfp_mask);
+		page = __page_frag_cache_refill(nc, gfp_mask);
 		if (!page)
 			return NULL;
 

From 4d09d0f45dd5d78b3a301c238272211d1ea7d9e6 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 10 Jan 2017 16:58:12 -0800
Subject: [PATCH 210/297] mm: add documentation for page fragment APIs

This is a first pass at trying to add documentation for the page_frag
APIs.  They may still change over time but for now I thought I would try
to get these documented so that as more network drivers and stack calls
make use of them we have one central spot to document how they are meant
to be used.

Link: http://lkml.kernel.org/r/20170104024157.13451.6758.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/page_frags | 42 +++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 Documentation/vm/page_frags

diff --git a/Documentation/vm/page_frags b/Documentation/vm/page_frags
new file mode 100644
index 000000000000..a6714565dbf9
--- /dev/null
+++ b/Documentation/vm/page_frags
@@ -0,0 +1,42 @@
+Page fragments
+--------------
+
+A page fragment is an arbitrary-length arbitrary-offset area of memory
+which resides within a 0 or higher order compound page.  Multiple
+fragments within that page are individually refcounted, in the page's
+reference counter.
+
+The page_frag functions, page_frag_alloc and page_frag_free, provide a
+simple allocation framework for page fragments.  This is used by the
+network stack and network device drivers to provide a backing region of
+memory for use as either an sk_buff->head, or to be used in the "frags"
+portion of skb_shared_info.
+
+In order to make use of the page fragment APIs a backing page fragment
+cache is needed.  This provides a central point for the fragment allocation
+and tracks allows multiple calls to make use of a cached page.  The
+advantage to doing this is that multiple calls to get_page can be avoided
+which can be expensive at allocation time.  However due to the nature of
+this caching it is required that any calls to the cache be protected by
+either a per-cpu limitation, or a per-cpu limitation and forcing interrupts
+to be disabled when executing the fragment allocation.
+
+The network stack uses two separate caches per CPU to handle fragment
+allocation.  The netdev_alloc_cache is used by callers making use of the
+__netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
+used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
+main difference between these two calls is the context in which they may be
+called.  The "netdev" prefixed functions are usable in any context as these
+functions will disable interrupts, while the "napi" prefixed functions are
+only usable within the softirq context.
+
+Many network device drivers use a similar methodology for allocating page
+fragments, but the page fragments are cached at the ring or descriptor
+level.  In order to enable these cases it is necessary to provide a generic
+way of tearing down a page cache.  For this reason __page_frag_cache_drain
+was implemented.  It allows for freeing multiple references from a single
+page via a single call.  The advantage to doing this is that it allows for
+cleaning up the multiple references that were added to a page in order to
+avoid calling get_page per allocation.
+
+Alexander Duyck, Nov 29, 2016.

From f05714293a591038304ddae7cb0dd747bb3786cc Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 10 Jan 2017 16:58:15 -0800
Subject: [PATCH 211/297] mm: support anonymous stable page

During developemnt for zram-swap asynchronous writeback, I found strange
corruption of compressed page, resulting in:

  Modules linked in: zram(E)
  CPU: 3 PID: 1520 Comm: zramd-1 Tainted: G            E   4.8.0-mm1-00320-ge0d4894c9c38-dirty #3274
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
  task: ffff88007620b840 task.stack: ffff880078090000
  RIP: set_freeobj.part.43+0x1c/0x1f
  RSP: 0018:ffff880078093ca8  EFLAGS: 00010246
  RAX: 0000000000000018 RBX: ffff880076798d88 RCX: ffffffff81c408c8
  RDX: 0000000000000018 RSI: 0000000000000000 RDI: 0000000000000246
  RBP: ffff880078093cb0 R08: 0000000000000000 R09: 0000000000000000
  R10: ffff88005bc43030 R11: 0000000000001df3 R12: ffff880076798d88
  R13: 000000000005bc43 R14: ffff88007819d1b8 R15: 0000000000000001
  FS:  0000000000000000(0000) GS:ffff88007e380000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fc934048f20 CR3: 0000000077b01000 CR4: 00000000000406e0
  Call Trace:
    obj_malloc+0x22b/0x260
    zs_malloc+0x1e4/0x580
    zram_bvec_rw+0x4cd/0x830 [zram]
    page_requests_rw+0x9c/0x130 [zram]
    zram_thread+0xe6/0x173 [zram]
    kthread+0xca/0xe0
    ret_from_fork+0x25/0x30

With investigation, it reveals currently stable page doesn't support
anonymous page.  IOW, reuse_swap_page can reuse the page without waiting
writeback completion so it can overwrite page zram is compressing.

Unfortunately, zram has used per-cpu stream feature from v4.7.
It aims for increasing cache hit ratio of scratch buffer for
compressing. Downside of that approach is that zram should ask
memory space for compressed page in per-cpu context which requires
stricted gfp flag which could be failed. If so, it retries to
allocate memory space out of per-cpu context so it could get memory
this time and compress the data again, copies it to the memory space.

In this scenario, zram assumes the data should never be changed
but it is not true unless stable page supports. So, If the data is
changed under us, zram can make buffer overrun because second
compression size could be bigger than one we got in previous trial
and blindly, copy bigger size object to smaller buffer which is
buffer overrun. The overrun breaks zsmalloc free object chaining
so system goes crash like above.

I think below is same problem.
https://bugzilla.suse.com/show_bug.cgi?id=997574

Unfortunately, reuse_swap_page should be atomic so that we cannot wait on
writeback in there so the approach in this patch is simply return false if
we found it needs stable page.  Although it increases memory footprint
temporarily, it happens rarely and it should be reclaimed easily althoug
it happened.  Also, It would be better than waiting of IO completion,
which is critial path for application latency.

Fixes: da9556a2367c ("zram: user per-cpu compression streams")
Link: http://lkml.kernel.org/r/20161120233015.GA14113@bbox
Link: http://lkml.kernel.org/r/1482366980-3782-2-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Hyeoncheol Lee <cheol.lee@lge.com>
Cc: <yjay.kim@lge.com>
Cc: Sangseok Lee <sangseok.lee@lge.com>
Cc: <stable@vger.kernel.org> [4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  3 ++-
 mm/swapfile.c        | 20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 09f4be179ff3..7f47b7098b1b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -150,8 +150,9 @@ enum {
 	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
 	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
 	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
+	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 10),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 11),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1c6e0321205d..4761701d1721 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount)
 	count = page_trans_huge_mapcount(page, total_mapcount);
 	if (count <= 1 && PageSwapCache(page)) {
 		count += page_swapcount(page);
-		if (count == 1 && !PageWriteback(page)) {
+		if (count != 1)
+			goto out;
+		if (!PageWriteback(page)) {
 			delete_from_swap_cache(page);
 			SetPageDirty(page);
+		} else {
+			swp_entry_t entry;
+			struct swap_info_struct *p;
+
+			entry.val = page_private(page);
+			p = swap_info_get(entry);
+			if (p->flags & SWP_STABLE_WRITES) {
+				spin_unlock(&p->lock);
+				return false;
+			}
+			spin_unlock(&p->lock);
 		}
 	}
+out:
 	return count <= 1;
 }
 
@@ -2448,6 +2462,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		error = -ENOMEM;
 		goto bad_swap;
 	}
+
+	if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
+		p->flags |= SWP_STABLE_WRITES;
+
 	if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
 		int cpu;
 

From e7ccfc4ccb703e0f033bd4617580039898e912dd Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 10 Jan 2017 16:58:18 -0800
Subject: [PATCH 212/297] zram: revalidate disk under init_lock

Commit b4c5c60920e3 ("zram: avoid lockdep splat by revalidate_disk")
moved revalidate_disk call out of init_lock to avoid lockdep
false-positive splat.  However, commit 08eee69fcf6b ("zram: remove
init_lock in zram_make_request") removed init_lock in IO path so there
is no worry about lockdep splat.  So, let's restore it.

This patch is needed to set BDI_CAP_STABLE_WRITES atomically in next
patch.

Fixes: da9556a2367c ("zram: user per-cpu compression streams")
Link: http://lkml.kernel.org/r/1482366980-3782-3-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Hyeoncheol Lee <cheol.lee@lge.com>
Cc: <yjay.kim@lge.com>
Cc: Sangseok Lee <sangseok.lee@lge.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: <stable@vger.kernel.org> [4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 15f58ab44d0b..195376b4472b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1095,14 +1095,8 @@ static ssize_t disksize_store(struct device *dev,
 	zram->comp = comp;
 	zram->disksize = disksize;
 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
-	up_write(&zram->init_lock);
-
-	/*
-	 * Revalidate disk out of the init_lock to avoid lockdep splat.
-	 * It's okay because disk's capacity is protected by init_lock
-	 * so that revalidate_disk always sees up-to-date capacity.
-	 */
 	revalidate_disk(zram->disk);
+	up_write(&zram->init_lock);
 
 	return len;
 

From b09ab054b69b07077bd3292f67e777861ac796e5 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 10 Jan 2017 16:58:21 -0800
Subject: [PATCH 213/297] zram: support BDI_CAP_STABLE_WRITES

zram has used per-cpu stream feature from v4.7.  It aims for increasing
cache hit ratio of scratch buffer for compressing.  Downside of that
approach is that zram should ask memory space for compressed page in
per-cpu context which requires stricted gfp flag which could be failed.
If so, it retries to allocate memory space out of per-cpu context so it
could get memory this time and compress the data again, copies it to the
memory space.

In this scenario, zram assumes the data should never be changed but it is
not true without stable page support.  So, If the data is changed under
us, zram can make buffer overrun so that zsmalloc free object chain is
broken so system goes crash like below

   https://bugzilla.suse.com/show_bug.cgi?id=997574

This patch adds BDI_CAP_STABLE_WRITES to zram for declaring "I am block
device needing *stable write*".

Fixes: da9556a2367c ("zram: user per-cpu compression streams")
Link: http://lkml.kernel.org/r/1482366980-3782-4-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Hyeoncheol Lee <cheol.lee@lge.com>
Cc: <yjay.kim@lge.com>
Cc: Sangseok Lee <sangseok.lee@lge.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: <stable@vger.kernel.org> [4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 195376b4472b..e5ab7d9e8c45 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -25,6 +25,7 @@
 #include <linux/genhd.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
+#include <linux/backing-dev.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/err.h>
@@ -112,6 +113,14 @@ static inline bool is_partial_io(struct bio_vec *bvec)
 	return bvec->bv_len != PAGE_SIZE;
 }
 
+static void zram_revalidate_disk(struct zram *zram)
+{
+	revalidate_disk(zram->disk);
+	/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
+	zram->disk->queue->backing_dev_info.capabilities |=
+		BDI_CAP_STABLE_WRITES;
+}
+
 /*
  * Check if request is within bounds and aligned on zram logical blocks.
  */
@@ -1095,7 +1104,7 @@ static ssize_t disksize_store(struct device *dev,
 	zram->comp = comp;
 	zram->disksize = disksize;
 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
-	revalidate_disk(zram->disk);
+	zram_revalidate_disk(zram);
 	up_write(&zram->init_lock);
 
 	return len;
@@ -1143,7 +1152,7 @@ static ssize_t reset_store(struct device *dev,
 	/* Make sure all the pending I/O are finished */
 	fsync_bdev(bdev);
 	zram_reset_device(zram);
-	revalidate_disk(zram->disk);
+	zram_revalidate_disk(zram);
 	bdput(bdev);
 
 	mutex_lock(&bdev->bd_mutex);

From c4e490cf148e85ead0d1b1c2caaba833f1d5b29f Mon Sep 17 00:00:00 2001
From: John Sperbeck <jsperbeck@google.com>
Date: Tue, 10 Jan 2017 16:58:24 -0800
Subject: [PATCH 214/297] mm/slab.c: fix SLAB freelist randomization duplicate
 entries

This patch fixes a bug in the freelist randomization code.  When a high
random number is used, the freelist will contain duplicate entries.  It
will result in different allocations sharing the same chunk.

It will result in odd behaviours and crashes.  It should be uncommon but
it depends on the machines.  We saw it happening more often on some
machines (every few hours of running tests).

Fixes: c7ce4f60ac19 ("mm: SLAB freelist randomization")
Link: http://lkml.kernel.org/r/20170103181908.143178-1-thgarnie@google.com
Signed-off-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 29bc6c0dedd0..4f2ec6bb46eb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2457,7 +2457,6 @@ union freelist_init_state {
 		unsigned int pos;
 		unsigned int *list;
 		unsigned int count;
-		unsigned int rand;
 	};
 	struct rnd_state rnd_state;
 };
@@ -2483,8 +2482,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
 	} else {
 		state->list = cachep->random_seq;
 		state->count = count;
-		state->pos = 0;
-		state->rand = rand;
+		state->pos = rand % count;
 		ret = true;
 	}
 	return ret;
@@ -2493,7 +2491,9 @@ static bool freelist_state_initialize(union freelist_init_state *state,
 /* Get the next entry on the list and randomize it using a random shift */
 static freelist_idx_t next_random_slot(union freelist_init_state *state)
 {
-	return (state->list[state->pos++] + state->rand) % state->count;
+	if (state->pos >= state->count)
+		state->pos = 0;
+	return state->list[state->pos++];
 }
 
 /* Swap two freelist entries */

From e5bbc8a6c992901058bc09e2ce01d16c111ff047 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 10 Jan 2017 16:58:27 -0800
Subject: [PATCH 215/297] mm/hugetlb.c: fix reservation race when freeing
 surplus pages

return_unused_surplus_pages() decrements the global reservation count,
and frees any unused surplus pages that were backing the reservation.

Commit 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in
return_unused_surplus_pages()") added a call to cond_resched_lock in the
loop freeing the pages.

As a result, the hugetlb_lock could be dropped, and someone else could
use the pages that will be freed in subsequent iterations of the loop.
This could result in inconsistent global hugetlb page state, application
api failures (such as mmap) failures or application crashes.

When dropping the lock in return_unused_surplus_pages, make sure that
the global reservation count (resv_huge_pages) remains sufficiently
large to prevent someone else from claiming pages about to be freed.

Analyzed by Paul Cassella.

Fixes: 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()")
Link: http://lkml.kernel.org/r/1483991767-6879-1-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Paul Cassella <cassella@cray.com>
Suggested-by: Michal Hocko <mhocko@kernel.org>
Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>	[3.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3edb759c5c7d..c7025c132670 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1773,23 +1773,32 @@ free:
 }
 
 /*
- * When releasing a hugetlb pool reservation, any surplus pages that were
- * allocated to satisfy the reservation must be explicitly freed if they were
- * never used.
- * Called with hugetlb_lock held.
+ * This routine has two main purposes:
+ * 1) Decrement the reservation count (resv_huge_pages) by the value passed
+ *    in unused_resv_pages.  This corresponds to the prior adjustments made
+ *    to the associated reservation map.
+ * 2) Free any unused surplus pages that may have been allocated to satisfy
+ *    the reservation.  As many as unused_resv_pages may be freed.
+ *
+ * Called with hugetlb_lock held.  However, the lock could be dropped (and
+ * reacquired) during calls to cond_resched_lock.  Whenever dropping the lock,
+ * we must make sure nobody else can claim pages we are in the process of
+ * freeing.  Do this by ensuring resv_huge_page always is greater than the
+ * number of huge pages we plan to free when dropping the lock.
  */
 static void return_unused_surplus_pages(struct hstate *h,
 					unsigned long unused_resv_pages)
 {
 	unsigned long nr_pages;
 
-	/* Uncommit the reservation */
-	h->resv_huge_pages -= unused_resv_pages;
-
 	/* Cannot return gigantic pages currently */
 	if (hstate_is_gigantic(h))
-		return;
+		goto out;
 
+	/*
+	 * Part (or even all) of the reservation could have been backed
+	 * by pre-allocated pages. Only free surplus pages.
+	 */
 	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
 	/*
@@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h,
 	 * when the nodes with surplus pages have no free pages.
 	 * free_pool_huge_page() will balance the the freed pages across the
 	 * on-line nodes with memory and will handle the hstate accounting.
+	 *
+	 * Note that we decrement resv_huge_pages as we free the pages.  If
+	 * we drop the lock, resv_huge_pages will still be sufficiently large
+	 * to cover subsequent pages we may free.
 	 */
 	while (nr_pages--) {
+		h->resv_huge_pages--;
+		unused_resv_pages--;
 		if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
-			break;
+			goto out;
 		cond_resched_lock(&hugetlb_lock);
 	}
+
+out:
+	/* Fully uncommit the reservation */
+	h->resv_huge_pages -= unused_resv_pages;
 }
 
 

From 575b1967e10a1f3038266244d2c7a3ca6b99fed8 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 10 Jan 2017 16:58:30 -0800
Subject: [PATCH 216/297] timerfd: export defines to userspace

Since userspace is expected to call timerfd syscalls directly with these
flags/ioctls, make sure we export them so they don't have to duplicate
the values themselves.

Link: http://lkml.kernel.org/r/20161219064052.7196-1-vapier@gentoo.org
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/timerfd.h      | 20 +-------------------
 include/uapi/linux/Kbuild    |  1 +
 include/uapi/linux/timerfd.h | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 19 deletions(-)
 create mode 100644 include/uapi/linux/timerfd.h

diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index bd36ce431e32..bab0b1ad0613 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,23 +8,7 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
-/* For O_CLOEXEC and O_NONBLOCK */
-#include <linux/fcntl.h>
-
-/* For _IO helpers */
-#include <linux/ioctl.h>
-
-/*
- * CAREFUL: Check include/asm-generic/fcntl.h when defining
- * new flags, since they might collide with O_* ones. We want
- * to re-use O_* flags that couldn't possibly have a meaning
- * from eventfd, in order to leave a free define-space for
- * shared O_* flags.
- */
-#define TFD_TIMER_ABSTIME (1 << 0)
-#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
-#define TFD_CLOEXEC O_CLOEXEC
-#define TFD_NONBLOCK O_NONBLOCK
+#include <uapi/linux/timerfd.h>
 
 #define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
 /* Flags for timerfd_create.  */
@@ -32,6 +16,4 @@
 /* Flags for timerfd_settime.  */
 #define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
 
-#define TFD_IOC_SET_TICKS	_IOW('T', 0, u64)
-
 #endif /* _LINUX_TIMERFD_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index a8b93e685239..f330ba4547cf 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -414,6 +414,7 @@ header-y += telephony.h
 header-y += termios.h
 header-y += thermal.h
 header-y += time.h
+header-y += timerfd.h
 header-y += times.h
 header-y += timex.h
 header-y += tiocl.h
diff --git a/include/uapi/linux/timerfd.h b/include/uapi/linux/timerfd.h
new file mode 100644
index 000000000000..6fcfaa8da173
--- /dev/null
+++ b/include/uapi/linux/timerfd.h
@@ -0,0 +1,36 @@
+/*
+ *  include/linux/timerfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#ifndef _UAPI_LINUX_TIMERFD_H
+#define _UAPI_LINUX_TIMERFD_H
+
+#include <linux/types.h>
+
+/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/fcntl.h>
+
+/* For _IO helpers */
+#include <linux/ioctl.h>
+
+/*
+ * CAREFUL: Check include/asm-generic/fcntl.h when defining
+ * new flags, since they might collide with O_* ones. We want
+ * to re-use O_* flags that couldn't possibly have a meaning
+ * from eventfd, in order to leave a free define-space for
+ * shared O_* flags.
+ *
+ * Also make sure to update the masks in include/linux/timerfd.h
+ * when adding new flags.
+ */
+#define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
+
+#define TFD_IOC_SET_TICKS	_IOW('T', 0, __u64)
+
+#endif /* _UAPI_LINUX_TIMERFD_H */

From cd3776638003b3362d9d7d1f27bcb80c276e2c28 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:30 +0200
Subject: [PATCH 217/297] net/mlx5e: Properly handle offloading of source udp
 port for IP tunnels

We can offload the matching on source udp port of ip tunnels for
decapsulation. We can not offload setting source udp port for tunnels
as part of encapsulation. Fix both the code that deals with matching
offload (decap) and the code that deal with encap offload to align with
that.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f8829b517156..b60feceab63b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -239,10 +239,6 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 			return -EOPNOTSUPP;
 
-		/* udp src port isn't supported */
-		if (memchr_inv(&mask->src, 0, sizeof(mask->src)))
-			return -EOPNOTSUPP;
-
 		if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
@@ -254,6 +250,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 udp_dport, ntohs(key->dst));
 
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+			 udp_sport, ntohs(mask->src));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+			 udp_sport, ntohs(key->src));
 	} else { /* udp dst port must be given */
 			return -EOPNOTSUPP;
 	}
@@ -796,6 +796,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
 		return -EOPNOTSUPP;
 
+	/* setting udp src port isn't supported */
+	if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src)))
+		return -EOPNOTSUPP;
+
 	if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
 		info.tp_dst = key->tp_dst;

From 2fcd82e9be133e4ec777f66fd67a8fb8e7748b1b Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:31 +0200
Subject: [PATCH 218/297] net/mlx5e: Warn when rejecting offload attempts of IP
 tunnels

We silently reject offloading of IPv6 tunnels, non vxlan tunnels,
vxlan tunnels where the dst port to match is not provided, etc.

Be a bit more verbose and print a warning so the user better
realizes what went wrong here and can fix it.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b60feceab63b..d2fc055f054a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -237,13 +237,16 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 
 		/* Full udp dst port must be given */
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
-			return -EOPNOTSUPP;
+			goto vxlan_match_offload_err;
 
 		if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
-		else
+		else {
+			netdev_warn(priv->netdev,
+				    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
 			return -EOPNOTSUPP;
+		}
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 udp_dport, ntohs(mask->dst));
@@ -255,7 +258,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 udp_sport, ntohs(key->src));
 	} else { /* udp dst port must be given */
-			return -EOPNOTSUPP;
+vxlan_match_offload_err:
+		netdev_warn(priv->netdev,
+			    "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
+		return -EOPNOTSUPP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
@@ -346,6 +352,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 			if (parse_tunnel_attr(priv, spec, f))
 				return -EOPNOTSUPP;
 			break;
+		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+			netdev_warn(priv->netdev,
+				    "IPv6 tunnel decap offload isn't supported\n");
 		default:
 			return -EOPNOTSUPP;
 		}
@@ -792,13 +801,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	int tunnel_type;
 	int err;
 
-	/* udp dst port must be given */
+	/* udp dst port must be set */
 	if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
-		return -EOPNOTSUPP;
+		goto vxlan_encap_offload_err;
 
 	/* setting udp src port isn't supported */
-	if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src)))
+	if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
+vxlan_encap_offload_err:
+		netdev_warn(priv->netdev,
+			    "must set udp dst port and not set udp src port\n");
 		return -EOPNOTSUPP;
+	}
 
 	if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
@@ -806,6 +819,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 		info.tun_id = tunnel_id_to_key32(key->tun_id);
 		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
 	} else {
+		netdev_warn(priv->netdev,
+			    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
 		return -EOPNOTSUPP;
 	}
 
@@ -813,6 +828,9 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	case AF_INET:
 		info.daddr = key->u.ipv4.dst;
 		break;
+	case AF_INET6:
+		netdev_warn(priv->netdev,
+			    "IPv6 tunnel encap offload isn't supported\n");
 	default:
 		return -EOPNOTSUPP;
 	}

From a42485eb0ee458da3a0df82b0e42ab58ce76be05 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:32 +0200
Subject: [PATCH 219/297] net/mlx5e: TC ipv4 tunnel encap offload error flow
 fixes

When the route lookup fails we should return the actual error.

When the neigh isn't valid, we should return -EOPNOTSUPP as done
in similar cases along the code.

When the offload can't take place as of invalid neigh etc, we
must release the neigh.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index d2fc055f054a..b62f06f3f7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -656,17 +656,14 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 
 #if IS_ENABLED(CONFIG_INET)
 	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
-	if (IS_ERR(rt)) {
-		pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
-		return -EOPNOTSUPP;
-	}
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 #else
 	return -EOPNOTSUPP;
 #endif
 
 	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
-		pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
-			__func__);
+		pr_warn("%s: can't offload, devices not on same HW e-switch\n", __func__);
 		ip_rt_put(rt);
 		return -EOPNOTSUPP;
 	}
@@ -727,8 +724,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 					  struct net_device **out_dev)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	struct neighbour *n = NULL;
 	struct flowi4 fl4 = {};
-	struct neighbour *n;
 	char *encap_header;
 	int encap_size;
 	__be32 saddr;
@@ -759,7 +756,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	e->out_dev = *out_dev;
 
 	if (!(n->nud_state & NUD_VALID)) {
-		err = -ENOTSUPP;
+		pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
+		err = -EOPNOTSUPP;
 		goto out;
 	}
 
@@ -781,6 +779,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
 			       encap_size, encap_header, &e->encap_id);
 out:
+	if (err && n)
+		neigh_release(n);
 	kfree(encap_header);
 	return err;
 }

From 2e72eb438ce5ea9fa118edfd9a5f628c2a69111a Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:33 +0200
Subject: [PATCH 220/297] net/mlx5e: Properly get address type of encapsulation
 IP headers

As done elsewhere in our TC/flower offload code, the address type of
the encapsulation IP headers should be realized accroding to the
addr_type field of the encapsulation control dissector key, do that.

Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b62f06f3f7e0..9cfddd9fc097 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -225,6 +225,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				       outer_headers);
 
+	struct flow_dissector_key_control *enc_control =
+		skb_flow_dissector_target(f->dissector,
+					  FLOW_DISSECTOR_KEY_ENC_CONTROL,
+					  f->key);
+
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 		struct flow_dissector_key_ports *key =
 			skb_flow_dissector_target(f->dissector,
@@ -264,7 +269,7 @@ vxlan_match_offload_err:
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+	if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 		struct flow_dissector_key_ipv4_addrs *key =
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -286,10 +291,10 @@ vxlan_match_offload_err:
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 			 ntohl(key->dst));
-	}
 
-	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+	}
 
 	/* Enforce DMAC when offloading incoming tunneled flows.
 	 * Flow counters require a match on the DMAC.

From 0827444d052ba5347900376dbdbf5d9065d091d4 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:34 +0200
Subject: [PATCH 221/297] net/mlx5e: Set inline mode requirements for matching
 on IP fragments

For e-switch level matching on packets being an IP fragment, we
need to make sure the source vport inline mode is L3, fix that.

Fixes: 3f7d0eb42d59 ('net/mlx5e: Offload TC matching on packets being IP fragments')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9cfddd9fc097..a35fa1eb0694 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -389,6 +389,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
 				 key->flags & FLOW_DIS_IS_FRAGMENT);
+
+			/* the HW doesn't need L3 inline to match on frag=no */
+			if (key->flags & FLOW_DIS_IS_FRAGMENT)
+				*min_inline = MLX5_INLINE_MODE_IP;
 		}
 	}
 

From a757d108dc1a053722215ee89116f8af9bba1525 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:35 +0200
Subject: [PATCH 222/297] net/mlx5e: Fix kbuild warnings for uninitialized
 parameters

kbuild warn about parameters that may be used uninitialized, fix it.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a35fa1eb0694..5dbc81de34ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -737,8 +737,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	struct flowi4 fl4 = {};
 	char *encap_header;
 	int encap_size;
-	__be32 saddr;
-	int ttl;
+	__be32 saddr = 0;
+	int ttl = 0;
 	int err;
 
 	encap_header = kzalloc(max_encap_size, GFP_KERNEL);

From 5e86397abe10aa4c884478a45e9a35b6a37d8d5d Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:36 +0200
Subject: [PATCH 223/297] net/mlx5e: Properly handle FW errors while adding TC
 rules

When the firmware returns an error (common example is an attempt to
add twice the same rule which is refused by the some FWs), we are not
properly derefing/cleaning few resources allocated on the way.
Examples are vport vlan deref under eswitch vlan offloads, and encap
entry/neighbour deref under eswitch encapsulation offloads, fix that.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c    | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5dbc81de34ee..118cea5e5489 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 	}
 }
 
+/* we get here also when setting rule to the FW failed, etc. It means that the
+ * flow rule itself might not exist, but some offloading related to the actions
+ * should be cleaned.
+ */
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_fc *counter = NULL;
 
-	counter = mlx5_flow_rule_counter(flow->rule);
-
-	mlx5_del_flow_rules(flow->rule);
+	if (!IS_ERR(flow->rule)) {
+		counter = mlx5_flow_rule_counter(flow->rule);
+		mlx5_del_flow_rules(flow->rule);
+		mlx5_fc_destroy(priv->mdev, counter);
+	}
 
 	if (esw && esw->mode == SRIOV_OFFLOADS) {
 		mlx5_eswitch_del_vlan_action(esw, flow->attr);
@@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			mlx5e_detach_encap(priv, flow);
 	}
 
-	mlx5_fc_destroy(priv->mdev, counter);
-
 	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
@@ -1017,7 +1021,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 
 	if (IS_ERR(flow->rule)) {
 		err = PTR_ERR(flow->rule);
-		goto err_free;
+		goto err_del_rule;
 	}
 
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
@@ -1028,7 +1032,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	goto out;
 
 err_del_rule:
-	mlx5_del_flow_rules(flow->rule);
+	mlx5e_tc_del_flow(priv, flow);
 
 err_free:
 	kfree(flow);

From 3deef8cea3efcaeeae240bb00541de66abb9bfa0 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:37 +0200
Subject: [PATCH 224/297] net/mlx5e: Un-register uplink representor on
 nic_disable

The code before this patch registered uplink e-Switch representor
on nic_enable and unregistered on nic_cleanup, the right place
for this unregister is in nic_disable.

Fixes: 127ea380acc9 ("net/mlx5: Add Representors registration API")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1236b27b1493..2b7dd315020c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3675,14 +3675,8 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-
 	mlx5e_vxlan_cleanup(priv);
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager))
-		mlx5_eswitch_unregister_vport_rep(esw, 0);
-
 	if (priv->xdp_prog)
 		bpf_prog_put(priv->xdp_prog);
 }
@@ -3807,9 +3801,14 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
 	queue_work(priv->wq, &priv->set_rx_mode_work);
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		mlx5_eswitch_unregister_vport_rep(esw, 0);
 	mlx5e_disable_async_events(priv);
-	mlx5_lag_remove(priv->mdev);
+	mlx5_lag_remove(mdev);
 }
 
 static const struct mlx5e_profile mlx5e_nic_profile = {

From 0bbcc0a8fc394d01988fe0263ccf7fddb77a12c3 Mon Sep 17 00:00:00 2001
From: Gil Rockah <gilr@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:38 +0200
Subject: [PATCH 225/297] net/mlx5e: Remove WARN_ONCE from adaptive moderation
 code

When trying to do interface down or changing interface configuration
under heavy traffic, some of the adaptive moderation corner cases can
occur and leave a WARN_ONCE call trace in the kernel log.

Those WARN_ONCE are meant for debug only, and should have been inserted
only under debug. We avoid such call traces by removing those WARN_ONCE.

Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing")
Signed-off-by: Gil Rockah <gilr@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
index 1fffe48a93cc..cbfac06b7ffd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
@@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e_rx_am *am)
 	switch (am->tune_state) {
 	case MLX5E_AM_PARKING_ON_TOP:
 	case MLX5E_AM_PARKING_TIRED:
-		WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n");
 		return true;
 	case MLX5E_AM_GOING_RIGHT:
 		return (am->steps_left > 1) && (am->steps_right == 1);
@@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_rx_am *am)
 	switch (am->tune_state) {
 	case MLX5E_AM_PARKING_ON_TOP:
 	case MLX5E_AM_PARKING_TIRED:
-		WARN_ONCE(true, "mlx5e_am_turn: PARKING\n");
 		break;
 	case MLX5E_AM_GOING_RIGHT:
 		am->tune_state = MLX5E_AM_GOING_LEFT;
@@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx_am *am)
 	switch (am->tune_state) {
 	case MLX5E_AM_PARKING_ON_TOP:
 	case MLX5E_AM_PARKING_TIRED:
-		WARN_ONCE(true, "mlx5e_am_step: PARKING\n");
 		break;
 	case MLX5E_AM_GOING_RIGHT:
 		if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1))
@@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 	u32 delta_us = ktime_us_delta(end->time, start->time);
 	unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
 
-	if (!delta_us) {
-		WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n");
+	if (!delta_us)
 		return;
-	}
 
 	curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
 	curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;

From 5e44fca5047054f1762813751626b5245e0da022 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Tue, 10 Jan 2017 22:33:39 +0200
Subject: [PATCH 226/297] net/mlx5: Only cancel recovery work when cleaning up
 device

Do not attempt to drain the health workqueue when unloading the device in
the recovery flow, this can cause a deadlock when the recovery work
tries to cancel itself with sync.

Because the work is no longer unconditionally canceled when unloading, it
must be explicitly canceled in the AER flow.

fixes: 689a248df83b ("net/mlx5: Cancel recovery work in remove flow")
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6547f22e6b9b..d01e9f21d469 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1195,7 +1195,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 {
 	int err = 0;
 
-	mlx5_drain_health_wq(dev);
+	if (cleanup)
+		mlx5_drain_health_wq(dev);
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
@@ -1359,9 +1360,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev);
 	mlx5_unload_one(dev, priv, false);
-	/* In case of kernel call save the pci state */
+	/* In case of kernel call save the pci state and drain the health wq */
 	if (state) {
 		pci_save_state(pdev);
+		mlx5_drain_health_wq(dev);
 		mlx5_pci_disable_device(dev);
 	}
 

From 7ee7f45a763bd68c3a606595a8c1bb08c3e6146b Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 11 Jan 2017 01:27:21 +0200
Subject: [PATCH 227/297] mei: bus: enable OS version only for SPT and newer

Sending OS version for support of TPM2_ChangeEPS() is required only
for SPT FW (HMB version 2.0) and newer.
On older platforms the command should be just ignored by the firmware
but some older platforms misbehave so it's safer to send the command
only if required.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=192051
Fixes: 7279b238bade (mei: send OS type to the FW)
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Tested-by: Jan Niehusmann <jan@gondor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c | 3 +++
 drivers/misc/mei/debugfs.c   | 2 ++
 drivers/misc/mei/hbm.c       | 4 ++++
 drivers/misc/mei/hw.h        | 6 ++++++
 drivers/misc/mei/mei_dev.h   | 2 ++
 5 files changed, 17 insertions(+)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 18e05ca7584f..3600c9993a98 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -152,6 +152,9 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
 {
 	int ret;
 
+	if (!cldev->bus->hbm_f_os_supported)
+		return;
+
 	ret = mei_cldev_enable(cldev);
 	if (ret)
 		return;
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index c6c051b52f55..c6217a4993ad 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -180,6 +180,8 @@ static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
 				 dev->hbm_f_ev_supported);
 		pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n",
 				 dev->hbm_f_fa_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n",
+				 dev->hbm_f_os_supported);
 	}
 
 	pos += scnprintf(buf + pos, bufsz - pos, "pg:  %s, %s\n",
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index dd7f15a65eed..25b4a1ba522d 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -989,6 +989,10 @@ static void mei_hbm_config_features(struct mei_device *dev)
 	/* Fixed Address Client Support */
 	if (dev->version.major_version >= HBM_MAJOR_VERSION_FA)
 		dev->hbm_f_fa_supported = 1;
+
+	/* OS ver message Support */
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_OS)
+		dev->hbm_f_os_supported = 1;
 }
 
 /**
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index 9daf3f9aed25..e1e4d47d4d7d 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -76,6 +76,12 @@
 #define HBM_MINOR_VERSION_FA               0
 #define HBM_MAJOR_VERSION_FA               2
 
+/*
+ * MEI version with OS ver message support
+ */
+#define HBM_MINOR_VERSION_OS               0
+#define HBM_MAJOR_VERSION_OS               2
+
 /* Host bus message command opcode */
 #define MEI_HBM_CMD_OP_MSK                  0x7f
 /* Host bus message command RESPONSE */
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 699693cd8c59..8dadb98662a9 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -406,6 +406,7 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @hbm_f_ev_supported  : hbm feature event notification
  * @hbm_f_fa_supported  : hbm feature fixed address client
  * @hbm_f_ie_supported  : hbm feature immediate reply to enum request
+ * @hbm_f_os_supported  : hbm feature support OS ver message
  *
  * @me_clients_rwsem: rw lock over me_clients list
  * @me_clients  : list of FW clients
@@ -487,6 +488,7 @@ struct mei_device {
 	unsigned int hbm_f_ev_supported:1;
 	unsigned int hbm_f_fa_supported:1;
 	unsigned int hbm_f_ie_supported:1;
+	unsigned int hbm_f_os_supported:1;
 
 	struct rw_semaphore me_clients_rwsem;
 	struct list_head me_clients;

From 488debb9971bc7d0edd6d8080ba78ca02a04f6c4 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 5 Jan 2017 17:15:01 +0000
Subject: [PATCH 228/297] drivers: char: mem: Fix thinkos in kmem address
 checks

When borrowing the pfn_valid() check from mmap_kmem(), somebody managed
to get physical and virtual addresses spectacularly muddled up, such
that we've ended up with checks for one being the other. Whilst this
does indeed prevent out-of-bounds accesses crashing, on most systems
it also prevents the more desirable use-case of working at all ever.

Check the *virtual* offset correctly for what it is. Furthermore, do
so in the right place - a read or write may span multiple pages, so a
single up-front check is insufficient. High memory accesses already
have a similar validity check just before the copy_to_user() call, so
just make the low memory path fully consistent with that.

Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
CC: stable@vger.kernel.org
Fixes: 148a1bc84398 ("drivers: char: mem: Check {read,write}_kmem() addresses")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/mem.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5bb1985ec484..6d9cc2d39d22 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -381,9 +381,6 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
 	char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
 	int err = 0;
 
-	if (!pfn_valid(PFN_DOWN(p)))
-		return -EIO;
-
 	read = 0;
 	if (p < (unsigned long) high_memory) {
 		low_count = count;
@@ -412,6 +409,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
 			 * by the kernel or data corruption may occur
 			 */
 			kbuf = xlate_dev_kmem_ptr((void *)p);
+			if (!virt_addr_valid(kbuf))
+				return -ENXIO;
 
 			if (copy_to_user(buf, kbuf, sz))
 				return -EFAULT;
@@ -482,6 +481,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
 		 * corruption may occur.
 		 */
 		ptr = xlate_dev_kmem_ptr((void *)p);
+		if (!virt_addr_valid(ptr))
+			return -ENXIO;
 
 		copied = copy_from_user(ptr, buf, sz);
 		if (copied) {
@@ -512,9 +513,6 @@ static ssize_t write_kmem(struct file *file, const char __user *buf,
 	char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
 	int err = 0;
 
-	if (!pfn_valid(PFN_DOWN(p)))
-		return -EIO;
-
 	if (p < (unsigned long) high_memory) {
 		unsigned long to_write = min_t(unsigned long, count,
 					       (unsigned long)high_memory - p);

From 89d8232411a85b9a6b12fd5da4d07d8a138a8e0c Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 13 Dec 2016 17:27:56 +0100
Subject: [PATCH 229/297] tty/serial: atmel_serial: BUG: stop DMA from
 transmitting in stop_tx

If we don't disable the transmitter in atmel_stop_tx, the DMA buffer
continues to send data until it is emptied.
This cause problems with the flow control (CTS is asserted and data are
still sent).

So, disabling the transmitter in atmel_stop_tx is a sane thing to do.

Tested on at91sam9g35-cm(DMA)
Tested for regressions on sama5d2-xplained(Fifo) and at91sam9g20ek(PDC)

Cc: <stable@vger.kernel.org> (beware, this won't apply before 4.3)
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 168b10cad47b..f9d42de5ab2d 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -481,6 +481,14 @@ static void atmel_stop_tx(struct uart_port *port)
 		/* disable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 	}
+
+	/*
+	 * Disable the transmitter.
+	 * This is mandatory when DMA is used, otherwise the DMA buffer
+	 * is fully transmitted.
+	 */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS);
+
 	/* Disable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
 
@@ -513,6 +521,9 @@ static void atmel_start_tx(struct uart_port *port)
 
 	/* Enable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
+
+	/* re-enable the transmitter */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
 }
 
 /*

From b389f173aaa1204d6dc1f299082a162eb0491545 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 6 Dec 2016 13:05:33 +0100
Subject: [PATCH 230/297] tty/serial: atmel: RS485 half duplex w/DMA: enable RX
 after TX is done

When using RS485 in half duplex, RX should be enabled when TX is
finished, and stopped when TX starts.

Before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half
duplex with DMA"), RX was not disabled in atmel_start_tx() if the DMA
was used. So, collisions could happened.

But disabling RX in atmel_start_tx() uncovered another bug:
RX was enabled again in the wrong place (in atmel_tx_dma) instead of
being enabled when TX is finished (in atmel_complete_tx_dma), so the
transmission simply stopped.

This bug was not triggered before commit 0058f0871efe7b01c6
("tty/serial: atmel: fix RS485 half duplex with DMA") because RX was
never disabled before.

Moving atmel_start_rx() in atmel_complete_tx_dma() corrects the problem.

Cc: stable@vger.kernel.org
Reported-by: Gil Weber <webergil@gmail.com>
Fixes: 0058f0871efe7b01c6
Tested-by: Gil Weber <webergil@gmail.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index f9d42de5ab2d..fabbe76203bb 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -809,6 +809,11 @@ static void atmel_complete_tx_dma(void *arg)
 	 */
 	if (!uart_circ_empty(xmit))
 		atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
+	else if ((port->rs485.flags & SER_RS485_ENABLED) &&
+		 !(port->rs485.flags & SER_RS485_RX_DURING_TX)) {
+		/* DMA done, stop TX, start RX for RS485 */
+		atmel_start_rx(port);
+	}
 
 	spin_unlock_irqrestore(&port->lock, flags);
 }
@@ -911,12 +916,6 @@ static void atmel_tx_dma(struct uart_port *port)
 		desc->callback = atmel_complete_tx_dma;
 		desc->callback_param = atmel_port;
 		atmel_port->cookie_tx = dmaengine_submit(desc);
-
-	} else {
-		if (port->rs485.flags & SER_RS485_ENABLED) {
-			/* DMA done, stop TX, start RX for RS485 */
-			atmel_start_rx(port);
-		}
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)

From c130b666a9a711f985a0a44b58699ebe14bb7245 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Date: Wed, 28 Dec 2016 16:42:00 -0200
Subject: [PATCH 231/297] 8250_pci: Fix potential use-after-free in error path

Commit f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during
PCI error recovery") introduces a potential use-after-free in case the
pciserial_init_ports call in serial8250_io_resume fails, which may
happen if a memory allocation fails or if the .init quirk failed for
whatever reason).  If this happen, further pci_get_drvdata will return a
pointer to freed memory.

This patch reworks the PCI recovery resume hook to restore the old priv
structure in this case, which should be ok, since the ports were already
detached. Such error during recovery causes us to give up on the
recovery.

Fixes: f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during
  PCI error recovery")
Reported-by: Michal Suchanek <msuchanek@suse.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index aa0166b6d450..116436b7fa52 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5642,17 +5642,15 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev)
 static void serial8250_io_resume(struct pci_dev *dev)
 {
 	struct serial_private *priv = pci_get_drvdata(dev);
-	const struct pciserial_board *board;
+	struct serial_private *new;
 
 	if (!priv)
 		return;
 
-	board = priv->board;
-	kfree(priv);
-	priv = pciserial_init_ports(dev, board);
-
-	if (!IS_ERR(priv)) {
-		pci_set_drvdata(dev, priv);
+	new = pciserial_init_ports(dev, priv->board);
+	if (!IS_ERR(new)) {
+		pci_set_drvdata(dev, new);
+		kfree(priv);
 	}
 }
 

From 2bed8a8e70729f996af92042d3ad0f11870acc1f Mon Sep 17 00:00:00 2001
From: Daniel Jedrychowski <avistel@gmail.com>
Date: Mon, 12 Dec 2016 09:18:28 +1100
Subject: [PATCH 232/297] Clearing FIFOs in RS485 emulation mode causes
 subsequent transmits to break

When in RS485 emulation mode, __do_stop_tx_rs485() calls
serial8250_clear_fifos().  This not only clears the FIFOs, but also sets
all bits in their control register (UART_FCR) to 0.

One of the effects of this is the disabling of the FIFOs, which turns
them into single-byte holding registers.  The rest of the driver doesn't
know this, which results in the lions share of characters passed into a
write call to be dropped.

(I can supply logic analyzer screenshots if necessary)

This fix replaces the serial8250_clear_fifos() call to
serial8250_clear_and_reinit_fifos() - this prevents the "dropped
characters" issue from manifesting again while retaining the requirement
of clearing the RX FIFO after transmission if the SER_RS485_RX_DURING_TX
flag is disabled.

Signed-off-by: Daniel Jedrychowski <avistel@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fe4399b41df6..c13fec451d03 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1413,7 +1413,7 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p)
 	 * Enable previously disabled RX interrupts.
 	 */
 	if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
-		serial8250_clear_fifos(p);
+		serial8250_clear_and_reinit_fifos(p);
 
 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
 		serial_port_out(&p->port, UART_IER, p->ier);

From 6741f551a0b26479de2532ffa43a366747e6dbf3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Dec 2016 10:05:49 +0800
Subject: [PATCH 233/297] Revert "tty: serial: 8250: add CON_CONSDEV to flags"

This commit needs to be reverted because it prevents people from
using the serial console as a secondary console with input being
directed to tty0.

IOW, if you boot with console=ttyS0 console=tty0 then all kernels
prior to this commit will produce output on both ttyS0 and tty0
but input will only be taken from tty0.  With this patch the serial
console will always be the primary console instead of tty0,
potentially preventing people from getting into their machines in
emergency situations.

Fixes: d03516df8375 ("tty: serial: 8250: add CON_CONSDEV to flags")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 61569a765d9e..76e03a7de9cc 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -675,7 +675,7 @@ static struct console univ8250_console = {
 	.device		= uart_console_device,
 	.setup		= univ8250_console_setup,
 	.match		= univ8250_console_match,
-	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV,
+	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
 	.index		= -1,
 	.data		= &serial8250_reg,
 };

From 5b11ebedd6a8bb4271b796e498cd15c0fe1133b6 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sat, 3 Dec 2016 16:56:49 +0800
Subject: [PATCH 234/297] extcon: return error code on failure

Function get_zeroed_page() returns a NULL pointer if there is no enough
memory. In function extcon_sync(), it returns 0 if the call to
get_zeroed_page() fails. The return value 0 indicates success in the
context, which is incosistent with the execution status. This patch
fixes the bug by returning -ENOMEM.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188611

Signed-off-by: Pan Bian <bianpan2016@163.com>
Fixes: a580982f0836e
Cc: stable <stable@vger.kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 78298460d168..7c1e3a7b14e0 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -453,7 +453,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id)
 		dev_err(&edev->dev, "out of memory in extcon_set_state\n");
 		kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE);
 
-		return 0;
+		return -ENOMEM;
 	}
 
 	length = name_show(&edev->dev, NULL, prop_buf);

From 0fa2c8eb270413160557babda519aa3c21e2bfaf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Dec 2016 16:23:55 +0000
Subject: [PATCH 235/297] ppdev: don't print a free'd string

A previous fix of a memory leak now prints the string 'name'
that was previously free'd.  Fix this by free'ing the string
at the end of the function and adding an error exit path for
the error conditions.

CoverityScan CID#1384523 ("Use after free")

Fixes: 2bd362d5f45c1 ("ppdev: fix memory leak")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ppdev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 02819e0703c8..87885d146dbb 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -290,6 +290,7 @@ static int register_device(int minor, struct pp_struct *pp)
 	struct pardevice *pdev = NULL;
 	char *name;
 	struct pardev_cb ppdev_cb;
+	int rc = 0;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -298,8 +299,8 @@ static int register_device(int minor, struct pp_struct *pp)
 	port = parport_find_number(minor);
 	if (!port) {
 		pr_warn("%s: no associated port!\n", name);
-		kfree(name);
-		return -ENXIO;
+		rc = -ENXIO;
+		goto err;
 	}
 
 	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
@@ -308,16 +309,18 @@ static int register_device(int minor, struct pp_struct *pp)
 	ppdev_cb.private = pp;
 	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
 	parport_put_port(port);
-	kfree(name);
 
 	if (!pdev) {
 		pr_warn("%s: failed to register device!\n", name);
-		return -ENXIO;
+		rc = -ENXIO;
+		goto err;
 	}
 
 	pp->pdev = pdev;
 	dev_dbg(&pdev->dev, "registered pardevice\n");
-	return 0;
+err:
+	kfree(name);
+	return rc;
 }
 
 static enum ieee1284_phase init_phase(int mode)

From 802c03881f29844af0252b6e22be5d2f65f93fd0 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 6 Jan 2017 02:14:16 +0900
Subject: [PATCH 236/297] sysrq: attach sysrq handler correctly for 32-bit
 kernel

The sysrq input handler should be attached to the input device which has
a left alt key.

On 32-bit kernels, some input devices which has a left alt key cannot
attach sysrq handler.  Because the keybit bitmap in struct input_device_id
for sysrq is not correctly initialized.  KEY_LEFTALT is 56 which is
greater than BITS_PER_LONG on 32-bit kernels.

I found this problem when using a matrix keypad device which defines
a KEY_LEFTALT (56) but doesn't have a KEY_O (24 == 56%32).

Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/sysrq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 52bbd27e93ae..701c085bb19b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -946,8 +946,8 @@ static const struct input_device_id sysrq_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { BIT_MASK(KEY_LEFTALT) },
+		.evbit = { [BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_LEFTALT)] = BIT_MASK(KEY_LEFTALT) },
 	},
 	{ },
 };

From 546cf3ef9c92b76ff0037c871b939e63caea98b3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 26 Dec 2016 09:58:34 -0800
Subject: [PATCH 237/297] auxdisplay: fix new ht16k33 build errors

Fix build errors caused by selecting incorrect kconfig symbols.

drivers/built-in.o:(.data+0x19cec): undefined reference to `sys_fillrect'
drivers/built-in.o:(.data+0x19cf0): undefined reference to `sys_copyarea'
drivers/built-in.o:(.data+0x19cf4): undefined reference to `sys_imageblit'

Fixes: 31114fa95bdb (auxdisplay: ht16k33: select framebuffer helper modules)

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/auxdisplay/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 4ef4c5caed4f..8a8e403644d6 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -132,9 +132,9 @@ config HT16K33
 	tristate "Holtek Ht16K33 LED controller with keyscan"
 	depends on FB && OF && I2C && INPUT
 	select FB_SYS_FOPS
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
 	select INPUT_MATRIXKMAP
 	select FB_BACKLIGHT
 	help

From 24b91e360ef521a2808771633d76ebc68bd5604b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 4 Jan 2017 15:12:04 +0100
Subject: [PATCH 238/297] nohz: Fix collision between tick and other hrtimers

When the tick is stopped and an interrupt occurs afterward, we check on
that interrupt exit if the next tick needs to be rescheduled. If it
doesn't need any update, we don't want to do anything.

In order to check if the tick needs an update, we compare it against the
clockevent device deadline. Now that's a problem because the clockevent
device is at a lower level than the tick itself if it is implemented
on top of hrtimer.

Every hrtimer share this clockevent device. So comparing the next tick
deadline against the clockevent device deadline is wrong because the
device may be programmed for another hrtimer whose deadline collides
with the tick. As a result we may end up not reprogramming the tick
accidentally.

In a worst case scenario under full dynticks mode, the tick stops firing
as it is supposed to every 1hz, leaving /proc/stat stalled:

      Task in a full dynticks CPU
      ----------------------------

      * hrtimer A is queued 2 seconds ahead
      * the tick is stopped, scheduled 1 second ahead
      * tick fires 1 second later
      * on tick exit, nohz schedules the tick 1 second ahead but sees
        the clockevent device is already programmed to that deadline,
        fooled by hrtimer A, the tick isn't rescheduled.
      * hrtimer A is cancelled before its deadline
      * tick never fires again until an interrupt happens...

In order to fix this, store the next tick deadline to the tick_sched
local structure and reuse that value later to check whether we need to
reprogram the clock after an interrupt.

On the other hand, ts->sleep_length still wants to know about the next
clock event and not just the tick, so we want to improve the related
comment to avoid confusion.

Reported-by: James Hartsock <hartsjc@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rik van Riel <riel@redhat.com>
Link: http://lkml.kernel.org/r/1483539124-5693-1-git-send-email-fweisbec@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 9 +++++++--
 kernel/time/tick-sched.h | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2c115fdab397..74e0388cc88d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -767,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	tick = expires;
 
 	/* Skip reprogram of event if its not changed */
-	if (ts->tick_stopped && (expires == dev->next_event))
+	if (ts->tick_stopped && (expires == ts->next_tick))
 		goto out;
 
 	/*
@@ -787,6 +787,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
+	ts->next_tick = tick;
+
 	/*
 	 * If the expiration time == KTIME_MAX, then we simply stop
 	 * the tick timer.
@@ -802,7 +804,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	else
 		tick_program_event(tick, 1);
 out:
-	/* Update the estimated sleep length */
+	/*
+	 * Update the estimated sleep length until the next timer
+	 * (not only the tick).
+	 */
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 	return tick;
 }
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bf38226e5c17..075444e3d48e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -27,6 +27,7 @@ enum tick_nohz_mode {
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
+ * @next_tick:		Next tick to be fired when in dynticks mode.
  * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
@@ -44,6 +45,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				last_tick;
+	ktime_t				next_tick;
 	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;

From c8a6a09c1c617402cc9254b2bc8da359a0347d75 Mon Sep 17 00:00:00 2001
From: Augusto Mecking Caringi <augustocaringi@gmail.com>
Date: Tue, 10 Jan 2017 10:45:00 +0000
Subject: [PATCH 239/297] vme: Fix wrong pointer utilization in
 ca91cx42_slave_get
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ca91cx42_slave_get function, the value pointed by vme_base pointer is
set through:

*vme_base = ioread32(bridge->base + CA91CX42_VSI_BS[i]);

So it must be dereferenced to be used in calculation of pci_base:

*pci_base = (dma_addr_t)*vme_base + pci_offset;

This bug was caught thanks to the following gcc warning:

drivers/vme/bridges/vme_ca91cx42.c: In function ‘ca91cx42_slave_get’:
drivers/vme/bridges/vme_ca91cx42.c:467:14: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
*pci_base = (dma_addr_t)vme_base + pci_offset;

Signed-off-by: Augusto Mecking Caringi <augustocaringi@gmail.com>
Acked-By: Martyn Welch <martyn@welchs.me.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vme/bridges/vme_ca91cx42.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index 6b5ee896af63..7cc51223db1c 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -464,7 +464,7 @@ static int ca91cx42_slave_get(struct vme_slave_resource *image, int *enabled,
 	vme_bound = ioread32(bridge->base + CA91CX42_VSI_BD[i]);
 	pci_offset = ioread32(bridge->base + CA91CX42_VSI_TO[i]);
 
-	*pci_base = (dma_addr_t)vme_base + pci_offset;
+	*pci_base = (dma_addr_t)*vme_base + pci_offset;
 	*size = (unsigned long long)((vme_bound - *vme_base) + granularity);
 
 	*enabled = 0;

From 69d012345a1a32d3f03957f14d972efccc106a98 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie.huang@arm.com>
Date: Wed, 11 Jan 2017 14:02:00 +0800
Subject: [PATCH 240/297] arm64: hugetlb: fix the wrong return value for
 huge_ptep_set_access_flags

In current code, the @changed always returns the last one's status for
the huge page with the contiguous bit set. This is really not what we
want. Even one of the PTEs is changed, we should tell it to the caller.

This patch fixes this issue.

Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
Cc: <stable@vger.kernel.org> # 4.5.x-
Signed-off-by: Huang Shijie <shijie.huang@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 964b7549af5c..e25584d72396 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -239,7 +239,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
 					  *cpte, &pgsize);
 		for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
-			changed = ptep_set_access_flags(vma, addr, cpte,
+			changed |= ptep_set_access_flags(vma, addr, cpte,
 							pfn_pte(pfn,
 								hugeprot),
 							dirty);

From 2d5a9c72d0c4ac73cf97f4b7814ed6c44b1e49ae Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 6 Jan 2017 19:15:18 +0100
Subject: [PATCH 241/297] USB: serial: ch341: fix control-message error
 handling

A short control transfer would currently fail to be detected, something
which could lead to stale buffer data being used as valid input.

Check for short transfers, and make sure to log any transfer errors.

Note that this also avoids leaking heap data to user space (TIOCMGET)
and the remote device (break control).

Fixes: 6ce76104781a ("USB: Driver for CH341 USB-serial adaptor")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8d7b0847109b..95aa5233726c 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -113,6 +113,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request,
 	r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
 			    value, index, NULL, 0, DEFAULT_TIMEOUT);
+	if (r < 0)
+		dev_err(&dev->dev, "failed to send control message: %d\n", r);
 
 	return r;
 }
@@ -130,7 +132,20 @@ static int ch341_control_in(struct usb_device *dev,
 	r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
 			    value, index, buf, bufsize, DEFAULT_TIMEOUT);
-	return r;
+	if (r < bufsize) {
+		if (r >= 0) {
+			dev_err(&dev->dev,
+				"short control message received (%d < %u)\n",
+				r, bufsize);
+			r = -EIO;
+		}
+
+		dev_err(&dev->dev, "failed to receive control message: %d\n",
+			r);
+		return r;
+	}
+
+	return 0;
 }
 
 static int ch341_set_baudrate_lcr(struct usb_device *dev,
@@ -181,9 +196,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control)
 
 static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv)
 {
+	const unsigned int size = 2;
 	char *buffer;
 	int r;
-	const unsigned size = 8;
 	unsigned long flags;
 
 	buffer = kmalloc(size, GFP_KERNEL);
@@ -194,14 +209,9 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv)
 	if (r < 0)
 		goto out;
 
-	/* setup the private status if available */
-	if (r == 2) {
-		r = 0;
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	} else
-		r = -EPROTO;
+	spin_lock_irqsave(&priv->lock, flags);
+	priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT;
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 out:	kfree(buffer);
 	return r;
@@ -211,9 +221,9 @@ out:	kfree(buffer);
 
 static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
 {
+	const unsigned int size = 2;
 	char *buffer;
 	int r;
-	const unsigned size = 8;
 
 	buffer = kmalloc(size, GFP_KERNEL);
 	if (!buffer)

From 6d6daa20945f3f598e56e18d1f926c08754f5801 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 5 Jan 2017 10:09:25 -0500
Subject: [PATCH 242/297] perf/x86/intel/uncore: Fix hardcoded socket 0
 assumption in the Haswell init code

hswep_uncore_cpu_init() uses a hardcoded physical package id 0 for the boot
cpu. This works as long as the boot CPU is actually on the physical package
0, which is normaly the case after power on / reboot.

But it fails with a NULL pointer dereference when a kdump kernel is started
on a secondary socket which has a different physical package id because the
locigal package translation for physical package 0 does not exist.

Use the logical package id of the boot cpu instead of hard coded 0.

[ tglx: Rewrote changelog once more ]

Fixes: cf6d445f6897 ("perf/x86/uncore: Track packages, not per CPU data")
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Harish Chegondi <harish.chegondi@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1483628965-2890-1-git-send-email-prarit@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/intel/uncore_snbep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index e6832be714bc..dae2fedc1601 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
 
 void hswep_uncore_cpu_init(void)
 {
-	int pkg = topology_phys_to_logical_pkg(0);
+	int pkg = boot_cpu_data.logical_proc_id;
 
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;

From 7cfd5fd5a9813f1430290d20c0fead9b4582a307 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Jan 2017 19:52:43 -0800
Subject: [PATCH 243/297] gro: use min_t() in skb_gro_reset_offset()

On 32bit arches, (skb->end - skb->data) is not 'unsigned int',
so we shall use min_t() instead of min() to avoid a compiler error.

Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom")
Reported-by: kernel test robot <fengguang.wu@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 88d2907ca2cd..07b307b0b414 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4441,8 +4441,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
 	    pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
-		NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0),
-						  skb->end - skb->tail);
+		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
+						    skb_frag_size(frag0),
+						    skb->end - skb->tail);
 	}
 }
 

From 73b351473547e543e9c8166dd67fd99c64c15b0b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Jan 2017 13:08:06 +0100
Subject: [PATCH 244/297] cgroup: move CONFIG_SOCK_CGROUP_DATA to init/Kconfig

We now 'select SOCK_CGROUP_DATA' but Kconfig complains that this is
not right when CONFIG_NET is disabled and there is no socket interface:

warning: (CGROUP_BPF) selects SOCK_CGROUP_DATA which has unmet direct dependencies (NET)

I don't know what the correct solution for this is, but simply removing
the dependency on NET from SOCK_CGROUP_DATA by moving it out of the
'if NET' section avoids the warning and does not produce other build
errors.

Fixes: 483c4933ea09 ("cgroup: Fix CGROUP_BPF config")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 init/Kconfig | 4 ++++
 net/Kconfig  | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 223b734abccd..e1a937348a3e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1176,6 +1176,10 @@ config CGROUP_DEBUG
 
 	  Say N.
 
+config SOCK_CGROUP_DATA
+	bool
+	default n
+
 endif # CGROUPS
 
 config CHECKPOINT_RESTORE
diff --git a/net/Kconfig b/net/Kconfig
index a1005007224c..a29bb4b41c50 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -258,10 +258,6 @@ config XPS
 config HWBM
        bool
 
-config SOCK_CGROUP_DATA
-	bool
-	default n
-
 config CGROUP_NET_PRIO
 	bool "Network priority cgroup"
 	depends on CGROUPS

From 7a18c5b9fb31a999afc62b0e60978aa896fc89e9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 10 Jan 2017 14:37:35 -0800
Subject: [PATCH 245/297] net: ipv4: Fix multipath selection with vrf

fib_select_path does not call fib_select_multipath if oif is set in the
flow struct. For VRF use cases oif is always set, so multipath route
selection is bypassed. Use the FLOWI_FLAG_SKIP_NH_OIF to skip the oif
check similar to what is done in fib_table_lookup.

Add saddr and proto to the flow struct for the fib lookup done by the
VRF driver to better match hash computation for a flow.

Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c        | 2 ++
 net/ipv4/fib_semantics.c | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 23dfb0eac098..0a067708aa39 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 		.flowi4_iif = LOOPBACK_IFINDEX,
 		.flowi4_tos = RT_TOS(ip4h->tos),
 		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
+		.flowi4_proto = ip4h->protocol,
 		.daddr = ip4h->daddr,
+		.saddr = ip4h->saddr,
 	};
 	struct net *net = dev_net(vrf_dev);
 	struct rtable *rt;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7a5b4c7d9a87..eba1546b5031 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1618,8 +1618,13 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, int mp_hash)
 {
+	bool oif_check;
+
+	oif_check = (fl4->flowi4_oif == 0 ||
+		     fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+	if (res->fi->fib_nhs > 1 && oif_check) {
 		if (mp_hash < 0)
 			mp_hash = get_hash_from_flowi4(fl4) >> 1;
 
@@ -1629,7 +1634,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 #endif
 	if (!res->prefixlen &&
 	    res->table->tb_num_default > 1 &&
-	    res->type == RTN_UNICAST && !fl4->flowi4_oif)
+	    res->type == RTN_UNICAST && oif_check)
 		fib_select_default(fl4, res);
 
 	if (!fl4->saddr)

From eb004603c857f3e3bfcda437b6c68fd258c54960 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 10 Jan 2017 22:53:06 +0000
Subject: [PATCH 246/297] sctp: Fix spelling mistake: "Atempt" -> "Attempt"

Trivial fix to spelling mistake in WARN_ONCE message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e54082699520..34efaa4ef2f6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1048,7 +1048,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 			     (new_transport->state == SCTP_PF)))
 				new_transport = asoc->peer.active_path;
 			if (new_transport->state == SCTP_UNCONFIRMED) {
-				WARN_ONCE(1, "Atempt to send packet on unconfirmed path.");
+				WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
 				sctp_chunk_fail(chunk, 0);
 				sctp_chunk_free(chunk);
 				continue;

From a13c06525ab9ff442924e67df9393a5efa914c56 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 10 Jan 2017 23:13:45 +0000
Subject: [PATCH 247/297] net: phy: marvell: fix Marvell 88E1512 used in SGMII
 mode

When an Marvell 88E1512 PHY is connected to a nic in SGMII mode, the
fiber page is used for the SGMII host-side connection.  The PHY driver
notices that SUPPORTED_FIBRE is set, so it tries reading the fiber page
for the link status, and ends up reading the MAC-side status instead of
the outgoing (copper) link.  This leads to incorrect results reported
via ethtool.

If the PHY is connected via SGMII to the host, ignore the fiber page.
However, continue to allow the existing power management code to
suspend and resume the fiber page.

Fixes: 6cfb3bcc0641 ("Marvell phy: check link status in case of fiber link.")
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e269262471a4..0b78210c0fa7 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1192,7 +1192,8 @@ static int marvell_read_status(struct phy_device *phydev)
 	int err;
 
 	/* Check the fiber mode first */
-	if (phydev->supported & SUPPORTED_FIBRE) {
+	if (phydev->supported & SUPPORTED_FIBRE &&
+	    phydev->interface != PHY_INTERFACE_MODE_SGMII) {
 		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER);
 		if (err < 0)
 			goto error;

From 24c63bbc18e25d5d8439422aa5fd2d66390b88eb Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 10 Jan 2017 15:22:25 -0800
Subject: [PATCH 248/297] net: vrf: do not allow table id 0

Frank reported that vrf devices can be created with a table id of 0.
This breaks many of the run time table id checks and should not be
allowed. Detect this condition at create time and fail with EINVAL.

Fixes: 193125dbd8eb ("net: Introduce VRF device driver")
Reported-by: Frank Kellermann <frank.kellermann@atos.net>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 0a067708aa39..454f907d419a 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1252,6 +1252,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		return -EINVAL;
 
 	vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
+	if (vrf->tb_id == RT_TABLE_UNSPEC)
+		return -EINVAL;
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 

From ad5013d5699d30ded0cdbbc68b93b2aa28222c6e Mon Sep 17 00:00:00 2001
From: Colin King <colin.king@canonical.com>
Date: Wed, 11 Jan 2017 11:43:10 +0000
Subject: [PATCH 249/297] perf/x86/intel: Use ULL constant to prevent undefined
 shift behaviour

When x86_pmu.num_counters is 32 the shift of the integer constant 1 is
exceeding 32bit and therefor undefined behaviour.

Fix this by shifting 1ULL instead of 1.

Reported-by: CoverityScan CID#1192105 ("Bad bit shift operation")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Link: http://lkml.kernel.org/r/20170111114310.17928-1-colin.king@canonical.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 86138267b68a..d611cab214a6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void)
 		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
 		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
 	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+	x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
 
 	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",

From d6169d04097fd9ddf811e63eae4e5cd71e6666e2 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 11 Jan 2017 17:10:34 +0200
Subject: [PATCH 250/297] xhci: fix deadlock at host remove by running watchdog
 correctly

If a URB is killed while the host is removed we can end up in a situation
where the hub thread takes the roothub device lock, and waits for
the URB to be given back by xhci-hcd, blocking the host remove code.

xhci-hcd tries to stop the endpoint and give back the urb, but can't
as the host is removed from PCI bus at the same time, preventing the normal
way of giving back urb.

Instead we need to rely on the stop command timeout function to give back
the urb. This xhci_stop_endpoint_command_watchdog() timeout function
used a XHCI_STATE_DYING flag to indicate if the timeout function is already
running, but later this flag has been taking into use in other places to
mark that xhci is dying.

Remove checks for XHCI_STATE_DYING in xhci_urb_dequeue. We are still
checking that reading from pci state does not return 0xffffffff or that
host is not halted before trying to stop the endpoint.

This whole area of stopping endpoints, giving back URBs, and the wathdog
timeout need rework, this fix focuses on solving a specific deadlock
issue that we can then send to stable before any major rework.

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 11 -----------
 drivers/usb/host/xhci.c      | 13 -------------
 2 files changed, 24 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 25f522b09dd9..e32029a31ca4 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -913,17 +913,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
 	spin_lock_irqsave(&xhci->lock, flags);
 
 	ep->stop_cmds_pending--;
-	if (xhci->xhc_state & XHCI_STATE_REMOVING) {
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		return;
-	}
-	if (xhci->xhc_state & XHCI_STATE_DYING) {
-		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-				"Stop EP timer ran, but another timer marked "
-				"xHCI as DYING, exiting.");
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		return;
-	}
 	if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 				"Stop EP timer ran, but no command pending, "
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0c8deb9ed42d..9a0ec116654a 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1534,19 +1534,6 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 		xhci_urb_free_priv(urb_priv);
 		return ret;
 	}
-	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
-			(xhci->xhc_state & XHCI_STATE_HALTED)) {
-		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
-				"Ep 0x%x: URB %p to be canceled on "
-				"non-responsive xHCI host.",
-				urb->ep->desc.bEndpointAddress, urb);
-		/* Let the stop endpoint command watchdog timer (which set this
-		 * state) finish cleaning up the endpoint TD lists.  We must
-		 * have caught it in the middle of dropping a lock and giving
-		 * back an URB.
-		 */
-		goto done;
-	}
 
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
 	ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];

From 1392370ee7de8aa3f69936f55bea6bfcc9879c59 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jan 2017 14:29:02 +0300
Subject: [PATCH 251/297] nvme-rdma: fix nvme_rdma_queue_is_ready

Now that we don't abuse the cmd field in struct request for nvme command
passthrough this function needs to be converted to the proper accessor
as well.

Fixes: d49187e97e ("nvme: introduce struct nvme_request")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f587af345889..34e564857716 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1422,7 +1422,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
 		struct request *rq)
 {
 	if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
-		struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+		struct nvme_command *cmd = nvme_req(rq)->cmd;
 
 		if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
 		    cmd->common.opcode != nvme_fabrics_command ||

From b5a10c5f7532b7473776da87e67f8301bbc32693 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Wed, 28 Dec 2016 22:13:15 -0200
Subject: [PATCH 252/297] nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time
 too

Commit 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter
readiness") introduced a quirk to adapters that cannot read the bit
NVME_CSTS_RDY right after register NVME_REG_CC is set; these adapters
need a delay or else the action of reading the bit NVME_CSTS_RDY could
somehow corrupt adapter's registers state and it never recovers.

When this quirk was added, we checked ctrl->tagset in order to avoid
quirking in probe time, supposing we would never require such delay
during probe. Well, it was too optimistic; we in fact need this quirk
at probe time in some cases, like after a kexec.

In some experiments, after abnormal shutdown of machine (aka power cord
unplug), we booted into our bootloader in Power, which is a Linux kernel,
and kexec'ed into another distro. If this kexec is too quick, we end up
reaching the probe of NVMe adapter in that distro when adapter is in
bad state (not fully initialized on our bootloader). What happens next
is that nvme_wait_ready() is unable to complete, except if the quirk is
enabled.

So, this patch removes the original ctrl->tagset verification in order
to enable the quirk even on probe time.

Fixes: 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness")
Reported-by: Andrew Byrne <byrneadw@ie.ibm.com>
Reported-by: Jaime A. H. Gomez <jahgomez@mx1.ibm.com>
Reported-by: Zachary D. Myers <zdmyers@us.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Acked-by: Jeffrey Lien <Jeff.Lien@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2fc86dc7a8df..8a3c3e32a704 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1106,12 +1106,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
 	if (ret)
 		return ret;
 
-	/* Checking for ctrl->tagset is a trick to avoid sleeping on module
-	 * load, since we only need the quirk on reset_controller. Notice
-	 * that the HGST device needs this delay only in firmware activation
-	 * procedure; unfortunately we have no (easy) way to verify this.
-	 */
-	if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset)
+	if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
 		msleep(NVME_QUIRK_DELAY_AMOUNT);
 
 	return nvme_wait_ready(ctrl, cap, false);

From 0a417b8dc1f10b03e8f558b8a831f07ec4c23795 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 11 Jan 2017 10:20:04 -0800
Subject: [PATCH 253/297] xfs: Timely free truncated dirty pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 99579ccec4e2 "xfs: skip dirty pages in ->releasepage()" started
to skip dirty pages in xfs_vm_releasepage() which also has the effect
that if a dirty page is truncated, it does not get freed by
block_invalidatepage() and is lingering in LRU list waiting for reclaim.
So a simple loop like:

while true; do
	dd if=/dev/zero of=file bs=1M count=100
	rm file
done

will keep using more and more memory until we hit low watermarks and
start pagecache reclaim which will eventually reclaim also the truncate
pages. Keeping these truncated (and thus never usable) pages in memory
is just a waste of memory, is unnecessarily stressing page cache
reclaim, and reportedly also leads to anonymous mmap(2) returning ENOMEM
prematurely.

So instead of just skipping dirty pages in xfs_vm_releasepage(), return
to old behavior of skipping them only if they have delalloc or unwritten
buffers and fix the spurious warnings by warning only if the page is
clean.

CC: stable@vger.kernel.org
CC: Brian Foster <bfoster@redhat.com>
CC: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Petr Tůma <petr.tuma@d3s.mff.cuni.cz>
Fixes: 99579ccec4e271c3d4d4e7c946058766812afdab
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0f56fcd3a5d5..631e7c0e0a29 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1152,19 +1152,22 @@ xfs_vm_releasepage(
 	 * block_invalidatepage() can send pages that are still marked dirty
 	 * but otherwise have invalidated buffers.
 	 *
-	 * We've historically freed buffers on the latter. Instead, quietly
-	 * filter out all dirty pages to avoid spurious buffer state warnings.
-	 * This can likely be removed once shrink_active_list() is fixed.
+	 * We want to release the latter to avoid unnecessary buildup of the
+	 * LRU, skip the former and warn if we've left any lingering
+	 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
+	 * or unwritten buffers and warn if the page is not dirty. Otherwise
+	 * try to release the buffers.
 	 */
-	if (PageDirty(page))
-		return 0;
-
 	xfs_count_page_state(page, &delalloc, &unwritten);
 
-	if (WARN_ON_ONCE(delalloc))
+	if (delalloc) {
+		WARN_ON_ONCE(!PageDirty(page));
 		return 0;
-	if (WARN_ON_ONCE(unwritten))
+	}
+	if (unwritten) {
+		WARN_ON_ONCE(!PageDirty(page));
 		return 0;
+	}
 
 	return try_to_free_buffers(page);
 }

From 6ed0993a0b859ce62edf2930ded683e452286d39 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:27:49 +0300
Subject: [PATCH 254/297] vfio-mdev: return -EFAULT if copy_to_user() fails

The copy_to_user() function returns the number of bytes which it wasn't
able to copy but we want to return a negative error code.

Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 1fc57a5093a7..975af5bbf28d 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1180,7 +1180,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 
 		memcpy(&mdev_state->dev_info, &info, sizeof(info));
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 	case VFIO_DEVICE_GET_REGION_INFO:
 	{
@@ -1201,7 +1204,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (ret)
 			return ret;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 
 	case VFIO_DEVICE_GET_IRQ_INFO:
@@ -1224,7 +1230,10 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (info.count == -1)
 			return -EINVAL;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
 	}
 	case VFIO_DEVICE_SET_IRQS:
 	{

From 5c677869e0abbffbade2cfd82d46d0eebe823f34 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:28:40 +0300
Subject: [PATCH 255/297] vfio-mdev: buffer overflow in ioctl()

This is a sample driver for documentation so the impact is probably
pretty low.  But we should check that bar_index is valid so we
don't write beyond the end of the mdev_state->region_info[] array.

Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 975af5bbf28d..382f4797428f 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1073,7 +1073,7 @@ int mtty_get_region_info(struct mdev_device *mdev,
 {
 	unsigned int size = 0;
 	struct mdev_state *mdev_state;
-	int bar_index;
+	u32 bar_index;
 
 	if (!mdev)
 		return -EINVAL;
@@ -1082,8 +1082,11 @@ int mtty_get_region_info(struct mdev_device *mdev,
 	if (!mdev_state)
 		return -EINVAL;
 
-	mutex_lock(&mdev_state->ops_lock);
 	bar_index = region_info->index;
+	if (bar_index >= VFIO_PCI_NUM_REGIONS)
+		return -EINVAL;
+
+	mutex_lock(&mdev_state->ops_lock);
 
 	switch (bar_index) {
 	case VFIO_PCI_CONFIG_REGION_INDEX:

From 73da4268fdbae972f617946d1c690f2136964802 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 7 Jan 2017 09:30:08 +0300
Subject: [PATCH 256/297] vfio-mdev: remove some dead code

We set info.count to 1 in mtty_get_irq_info() so static checkers
complain that, "Why do we have impossible conditions?"  The answer is
that it seems to be left over dead code that can be safely removed.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mtty.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 382f4797428f..ca495686b9c3 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1230,9 +1230,6 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (ret)
 			return ret;
 
-		if (info.count == -1)
-			return -EINVAL;
-
 		if (copy_to_user((void __user *)arg, &info, minsz))
 			return -EFAULT;
 

From a89af4abdf9b353cdd6f61afc0eaaac403304873 Mon Sep 17 00:00:00 2001
From: Brendan McGrath <redmcg@redmandi.dyndns.org>
Date: Sat, 7 Jan 2017 08:01:38 +1100
Subject: [PATCH 257/297] HID: i2c-hid: Add sleep between POWER ON and RESET

Support for the Asus Touchpad was recently added. It turns out this
device can fail initialisation (and become unusable) when the RESET
command is sent too soon after the POWER ON command.

Unfortunately the i2c-hid specification does not specify the need for
a delay between these two commands. But it was discovered the Windows
driver has a 1ms delay.

As a result, this patch modifies the i2c-hid module to add a sleep
inbetween the POWER ON and RESET commands which lasts between 1ms and 5ms.

See https://github.com/vlasenko/hid-asus-dkms/issues/24 for further
details.

Signed-off-by: Brendan McGrath <redmcg@redmandi.dyndns.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 78fb32a7b103..ea3c3546cef7 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -426,6 +426,15 @@ static int i2c_hid_hwreset(struct i2c_client *client)
 	if (ret)
 		goto out_unlock;
 
+	/*
+	 * The HID over I2C specification states that if a DEVICE needs time
+	 * after the PWR_ON request, it should utilise CLOCK stretching.
+	 * However, it has been observered that the Windows driver provides a
+	 * 1ms sleep between the PWR_ON and RESET requests and that some devices
+	 * rely on this.
+	 */
+	usleep_range(1000, 5000);
+
 	i2c_hid_dbg(ihid, "resetting...\n");
 
 	ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0);

From 900742d89c1b4e04bd373aec8470b88e183f08ca Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:22 -0600
Subject: [PATCH 258/297] x86/unwind: Silence warnings for non-current tasks

There are a handful of callers to save_stack_trace_tsk() and
show_stack() which try to unwind the stack of a task other than current.
In such cases, it's remotely possible that the task is running on one
CPU while the unwinder is reading its stack from another CPU, causing
the unwinder to see stack corruption.

These cases seem to be mostly harmless.  The unwinder has checks which
prevent it from following bad pointers beyond the bounds of the stack.
So it's not really a bug as long as the caller understands that
unwinding another task will not always succeed.

Since stack "corruption" on another task's stack isn't necessarily a
bug, silence the warnings when unwinding tasks other than current.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/00d8c50eea3446c1524a2a755397a3966629354c.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/unwind_frame.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 4443e499f279..195eebf6da20 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -207,6 +207,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	return true;
 
 bad_address:
+	/*
+	 * When unwinding a non-current task, the task might actually be
+	 * running on another CPU, in which case it could be modifying its
+	 * stack while we're reading it.  This is generally not a problem and
+	 * can be ignored as long as the caller understands that unwinding
+	 * another task will not always succeed.
+	 */
+	if (state->task != current)
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",

From 84936118bdf37bda513d4a361c38181a216427e0 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:23 -0600
Subject: [PATCH 259/297] x86/unwind: Disable KASAN checks for non-current
 tasks

There are a handful of callers to save_stack_trace_tsk() and
show_stack() which try to unwind the stack of a task other than current.
In such cases, it's remotely possible that the task is running on one
CPU while the unwinder is reading its stack from another CPU, causing
the unwinder to see stack corruption.

These cases seem to be mostly harmless.  The unwinder has checks which
prevent it from following bad pointers beyond the bounds of the stack.
So it's not really a bug as long as the caller understands that
unwinding another task will not always succeed.

In such cases, it's possible that the unwinder may read a KASAN-poisoned
region of the stack.  Account for that by using READ_ONCE_NOCHECK() when
reading the stack of another task.

Use READ_ONCE() when reading the stack of the current task, since KASAN
warnings can still be useful for finding bugs in that case.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/4c575eb288ba9f73d498dfe0acde2f58674598f1.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/stacktrace.h |  5 ++++-
 arch/x86/kernel/unwind_frame.c    | 20 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a3269c897ec5..20ce3db20f24 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -52,13 +52,16 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 static inline unsigned long *
 get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
+	struct inactive_task_frame *frame;
+
 	if (regs)
 		return (unsigned long *)regs->bp;
 
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
+	frame = (struct inactive_task_frame *)task->thread.sp;
+	return (unsigned long *)READ_ONCE_NOCHECK(frame->bp);
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 195eebf6da20..23d15565d02a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,21 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+
 static void unwind_dump(struct unwind_state *state, unsigned long *sp)
 {
 	static bool dumped_before = false;
@@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		return 0;
 
-	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+	addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
+	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
 				     addr_p);
 
 	return __kernel_text_address(addr) ? addr : 0;
@@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;
 	else
-		next_bp = (unsigned long *)*state->bp;
+		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
 
 	/* is the next frame pointer an encoded pointer to pt_regs? */
 	regs = decode_frame_pointer(next_bp);

From 2c96b2fe9c57b4267c3f0a680d82d7cc52e1c447 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:24 -0600
Subject: [PATCH 260/297] x86/unwind: Include __schedule() in stack traces

In the following commit:

  0100301bfdf5 ("sched/x86: Rewrite the switch_to() code")

... the layout of the 'inactive_task_frame' struct was designed to have
a frame pointer header embedded in it, so that the unwinder could use
the 'bp' and 'ret_addr' fields to report __schedule() on the stack (or
ret_from_fork() for newly forked tasks which haven't actually run yet).

Finish the job by changing get_frame_pointer() to return a pointer to
inactive_task_frame's 'bp' field rather than 'bp' itself.  This allows
the unwinder to start one frame higher on the stack, so that it properly
reports __schedule().

Reported-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/598e9f7505ed0aba86e8b9590aa528c6c7ae8dcd.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/stacktrace.h |  5 +----
 arch/x86/include/asm/switch_to.h  | 10 +++++++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 20ce3db20f24..2e41c50ddf47 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -52,16 +52,13 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 static inline unsigned long *
 get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
-	struct inactive_task_frame *frame;
-
 	if (regs)
 		return (unsigned long *)regs->bp;
 
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	frame = (struct inactive_task_frame *)task->thread.sp;
-	return (unsigned long *)READ_ONCE_NOCHECK(frame->bp);
+	return &((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5cb436acd463..fcc5cd387fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev,
 
 asmlinkage void ret_from_fork(void);
 
-/* data that is pointed to by thread.sp */
+/*
+ * This is the structure pointed to by thread.sp for an inactive task.  The
+ * order of the fields must match the code in __switch_to_asm().
+ */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
 	unsigned long r15;
@@ -48,6 +51,11 @@ struct inactive_task_frame {
 	unsigned long di;
 #endif
 	unsigned long bx;
+
+	/*
+	 * These two fields must be together.  They form a stack frame header,
+	 * needed by get_frame_pointer().
+	 */
 	unsigned long bp;
 	unsigned long ret_addr;
 };

From ff3f7e2475bbf9201e95824e72698fcdc5c3d47a Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 9 Jan 2017 12:00:25 -0600
Subject: [PATCH 261/297] x86/entry: Fix the end of the stack for newly forked
 tasks

When unwinding a task, the end of the stack is always at the same offset
right below the saved pt_regs, regardless of which syscall was used to
enter the kernel.  That convention allows the unwinder to verify that a
stack is sane.

However, newly forked tasks don't always follow that convention, as
reported by the following unwinder warning seen by Dave Jones:

  WARNING: kernel stack frame pointer at ffffc90001443f30 in kworker/u8:8:30468 has bad value           (null)

The warning was due to the following call chain:

  (ftrace handler)
  call_usermodehelper_exec_async+0x5/0x140
  ret_from_fork+0x22/0x30

The problem is that ret_from_fork() doesn't create a stack frame before
calling other functions.  Fix that by carefully using the frame pointer
macros.

In addition to conforming to the end of stack convention, this also
makes related stack traces more sensible by making it clear to the user
that ret_from_fork() was involved.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8854cdaab980e9700a81e9ebf0d4238e4bbb68ef.1483978430.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_32.S | 30 +++++++++++-------------------
 arch/x86/entry/entry_64.S | 11 +++++++----
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 701d29f8e4d3..57f7ec35216e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -254,23 +254,6 @@ ENTRY(__switch_to_asm)
 	jmp	__switch_to
 END(__switch_to_asm)
 
-/*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-ENTRY(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	ret
-ENDPROC(schedule_tail_wrapper)
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	call	schedule_tail_wrapper
+	FRAME_BEGIN		/* help unwinder find end of stack */
+
+	/*
+	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
+	 * on the stack.
+	 */
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
+	leal	FRAME_OFFSET(%esp), %eax
 	call    syscall_return_slowpath
+	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5b219707c2f2..044d18ebc43c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -408,17 +409,19 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
+	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
+	call	schedule_tail		/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
+	testq	%rbx, %rbx		/* from kernel_thread? */
+	jnz	1f			/* kernel threads are uncommon */
 
 2:
-	movq	%rsp, %rdi
+	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
+	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:

From b6416e61012429e0277bd15a229222fd17afc1c1 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 16 Dec 2016 14:30:35 -0800
Subject: [PATCH 262/297] jump_labels: API for flushing deferred jump label
 updates

Modules that use static_key_deferred need a way to synchronize with
any delayed work that is still pending when the module is unloaded.
Introduce static_key_deferred_flush() which flushes any pending
jump label updates.

Signed-off-by: David Matlack <dmatlack@google.com>
Cc: stable@vger.kernel.org
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/jump_label_ratelimit.h | 5 +++++
 kernel/jump_label.c                  | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index 089f70f83e97..23da3af459fe 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -14,6 +14,7 @@ struct static_key_deferred {
 
 #ifdef HAVE_JUMP_LABEL
 extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
+extern void static_key_deferred_flush(struct static_key_deferred *key);
 extern void
 jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
@@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 	STATIC_KEY_CHECK_USE();
 	static_key_slow_dec(&key->key);
 }
+static inline void static_key_deferred_flush(struct static_key_deferred *key)
+{
+	STATIC_KEY_CHECK_USE();
+}
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 93ad6c1fb9b6..a9b8cf500591 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key)
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 
+void static_key_deferred_flush(struct static_key_deferred *key)
+{
+	STATIC_KEY_CHECK_USE();
+	flush_delayed_work(&key->work);
+}
+EXPORT_SYMBOL_GPL(static_key_deferred_flush);
+
 void jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {

From cef84c302fe051744b983a92764d3fcca933415d Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 16 Dec 2016 14:30:36 -0800
Subject: [PATCH 263/297] KVM: x86: flush pending lapic jump label updates on
 module unload

KVM's lapic emulation uses static_key_deferred (apic_{hw,sw}_disabled).
These are implemented with delayed_work structs which can still be
pending when the KVM module is unloaded. We've seen this cause kernel
panics when the kvm_intel module is quickly reloaded.

Use the new static_key_deferred_flush() API to flush pending updates on
module unload.

Signed-off-by: David Matlack <dmatlack@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 6 ++++++
 arch/x86/kvm/lapic.h | 1 +
 arch/x86/kvm/x86.c   | 1 +
 3 files changed, 8 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5fe290c1b7d8..2f6ef5121a4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2426,3 +2426,9 @@ void kvm_lapic_init(void)
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
+
+void kvm_lapic_exit(void)
+{
+	static_key_deferred_flush(&apic_hw_disabled);
+	static_key_deferred_flush(&apic_sw_disabled);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e0c80233b3e1..ff8039d61672 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -110,6 +110,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
+void kvm_lapic_exit(void);
 
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2f22810a7e0c..a0ac6e0060fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6045,6 +6045,7 @@ out:
 
 void kvm_arch_exit(void)
 {
+	kvm_lapic_exit();
 	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 
 	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))

From 129a72a0d3c8e139a04512325384fe5ac119e74d Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 11 Jan 2017 18:28:29 -0800
Subject: [PATCH 264/297] KVM: x86: Introduce segmented_write_std

Introduces segemented_write_std.

Switches from emulated reads/writes to standard read/writes in fxsave,
fxrstor, sgdt, and sidt.  This fixes CVE-2017-2584, a longstanding
kernel memory leak.

Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR",
2016-11-09), which is luckily not yet in any final release, this would
also be an exploitable kernel memory *write*!

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Fixes: 96051572c819194c37a8367624b285be10297eca
Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56628a44668b..f36d0fa6b885 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -818,6 +818,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
 }
 
+static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
+			       struct segmented_address addr,
+			       void *data,
+			       unsigned int size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, true, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
 /*
  * Prefetch the remaining bytes of the instruction without crossing page
  * boundary if they are not in fetch_cache yet.
@@ -3685,8 +3699,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
 	}
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
-	return segmented_write(ctxt, ctxt->dst.addr.mem,
-			       &desc_ptr, 2 + ctxt->op_bytes);
+	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
+				   &desc_ptr, 2 + ctxt->op_bytes);
 }
 
 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
@@ -3932,7 +3946,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
 	else
 		size = offsetof(struct fxregs_state, xmm_space[0]);
 
-	return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
 }
 
 static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
@@ -3974,7 +3988,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 

From 4f3dbdf47e150016aacd734e663347fcaa768303 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 5 Jan 2017 17:39:42 -0800
Subject: [PATCH 265/297] KVM: eventfd: fix NULL deref irqbypass consumer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported syzkaller:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
    IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    PGD 0

    Oops: 0002 [#1] SMP
    CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1
    Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
    task: ffff9bbe0dfbb900 task.stack: ffffb61802014000
    RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    Call Trace:
     irqfd_shutdown+0x66/0xa0 [kvm]
     process_one_work+0x16b/0x480
     worker_thread+0x4b/0x500
     kthread+0x101/0x140
     ? process_one_work+0x480/0x480
     ? kthread_create_on_node+0x60/0x60
     ret_from_fork+0x25/0x30
    RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20
    CR2: 0000000000000008

The syzkaller folks reported a NULL pointer dereference that due to
unregister an consumer which fails registration before. The syzkaller
creates two VMs w/ an equal eventfd occasionally. So the second VM
fails to register an irqbypass consumer. It will make irqfd as inactive
and queue an workqueue work to shutdown irqfd and unregister the irqbypass
consumer when eventfd is closed. However, the second consumer has been
initialized though it fails registration. So the token(same as the first
VM's) is taken to unregister the consumer through the workqueue, the
consumer of the first VM is found and unregistered, then NULL deref incurred
in the path of deleting consumer from the consumers list.

This patch fixes it by making irq_bypass_register/unregister_consumer()
looks for the consumer entry based on consumer pointer itself instead of
token matching.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/lib/irqbypass.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 52abac4bb6a2..6d2fcd6fcb25 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
 	mutex_lock(&lock);
 
 	list_for_each_entry(tmp, &consumers, node) {
-		if (tmp->token == consumer->token) {
+		if (tmp->token == consumer->token || tmp == consumer) {
 			mutex_unlock(&lock);
 			module_put(THIS_MODULE);
 			return -EBUSY;
@@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
 	mutex_lock(&lock);
 
 	list_for_each_entry(tmp, &consumers, node) {
-		if (tmp->token != consumer->token)
+		if (tmp != consumer)
 			continue;
 
 		list_for_each_entry(producer, &producers, node) {

From 546d87e5c903a7f3ee7b9f998949a94729fbc65b Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 3 Jan 2017 18:56:19 -0800
Subject: [PATCH 266/297] KVM: x86: fix NULL deref in vcpu_scan_ioapic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

    BUG: unable to handle kernel NULL pointer dereference at 00000000000001b0
    IP: _raw_spin_lock+0xc/0x30
    PGD 3e28eb067
    PUD 3f0ac6067
    PMD 0
    Oops: 0002 [#1] SMP
    CPU: 0 PID: 2431 Comm: test Tainted: G           OE   4.10.0-rc1+ #3
    Call Trace:
     ? kvm_ioapic_scan_entry+0x3e/0x110 [kvm]
     kvm_arch_vcpu_ioctl_run+0x10a8/0x15f0 [kvm]
     ? pick_next_task_fair+0xe1/0x4e0
     ? kvm_arch_vcpu_load+0xea/0x260 [kvm]
     kvm_vcpu_ioctl+0x33a/0x600 [kvm]
     ? hrtimer_try_to_cancel+0x29/0x130
     ? do_nanosleep+0x97/0xf0
     do_vfs_ioctl+0xa1/0x5d0
     ? __hrtimer_init+0x90/0x90
     ? do_nanosleep+0x5b/0xf0
     SyS_ioctl+0x79/0x90
     do_syscall_64+0x6e/0x180
     entry_SYSCALL64_slow_path+0x25/0x25
    RIP: _raw_spin_lock+0xc/0x30 RSP: ffffa43688973cc0

The syzkaller folks reported a NULL pointer dereference due to
ENABLE_CAP succeeding even without an irqchip.  The Hyper-V
synthetic interrupt controller is activated, resulting in a
wrong request to rescan the ioapic and a NULL pointer dereference.

    #include <sys/ioctl.h>
    #include <sys/mman.h>
    #include <sys/types.h>
    #include <linux/kvm.h>
    #include <pthread.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>

    #ifndef KVM_CAP_HYPERV_SYNIC
    #define KVM_CAP_HYPERV_SYNIC 123
    #endif

    void* thr(void* arg)
    {
	struct kvm_enable_cap cap;
	cap.flags = 0;
	cap.cap = KVM_CAP_HYPERV_SYNIC;
	ioctl((long)arg, KVM_ENABLE_CAP, &cap);
	return 0;
    }

    int main()
    {
	void *host_mem = mmap(0, 0x1000, PROT_READ|PROT_WRITE,
			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	int kvmfd = open("/dev/kvm", 0);
	int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
	struct kvm_userspace_memory_region memreg;
	memreg.slot = 0;
	memreg.flags = 0;
	memreg.guest_phys_addr = 0;
	memreg.memory_size = 0x1000;
	memreg.userspace_addr = (unsigned long)host_mem;
	host_mem[0] = 0xf4;
	ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
	int cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
	struct kvm_sregs sregs;
	ioctl(cpufd, KVM_GET_SREGS, &sregs);
	sregs.cr0 = 0;
	sregs.cr4 = 0;
	sregs.efer = 0;
	sregs.cs.selector = 0;
	sregs.cs.base = 0;
	ioctl(cpufd, KVM_SET_SREGS, &sregs);
	struct kvm_regs regs = { .rflags = 2 };
	ioctl(cpufd, KVM_SET_REGS, &regs);
	ioctl(vmfd, KVM_CREATE_IRQCHIP, 0);
	pthread_t th;
	pthread_create(&th, 0, thr, (void*)(long)cpufd);
	usleep(rand() % 10000);
	ioctl(cpufd, KVM_RUN, 0);
	pthread_join(th, 0);
	return 0;
    }

This patch fixes it by failing ENABLE_CAP if without an irqchip.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 5c919412fe61 (kvm/x86: Hyper-V synthetic interrupt controller)
Cc: stable@vger.kernel.org # 4.5+
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0ac6e0060fb..57d8a856cdc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3342,6 +3342,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 	switch (cap->cap) {
 	case KVM_CAP_HYPERV_SYNIC:
+		if (!irqchip_in_kernel(vcpu->kvm))
+			return -EINVAL;
 		return kvm_hv_activate_synic(vcpu);
 	default:
 		return -EINVAL;

From 19c816e8e455f58da9997e4c6626f06203d8fbf0 Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 12 Jan 2017 16:52:02 +0800
Subject: [PATCH 267/297] capability: export has_capability

has_capability() is sometimes needed by modules to test capability
for specified task other than current, so export it.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Jike Song <jike.song@intel.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 kernel/capability.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/capability.c b/kernel/capability.c
index a98e814f216f..f97fe77ceb88 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -318,6 +318,7 @@ bool has_capability(struct task_struct *t, int cap)
 {
 	return has_ns_capability(t, &init_user_ns, cap);
 }
+EXPORT_SYMBOL(has_capability);
 
 /**
  * has_ns_capability_noaudit - Does a task have a capability (unaudited)

From 33ab91103b3415e12457e3104f0e4517ce12d0f3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2017 15:02:32 +0100
Subject: [PATCH 268/297] KVM: x86: fix emulation of "MOV SS, null selector"

This is CVE-2017-2583.  On Intel this causes a failed vmentry because
SS's type is neither 3 nor 7 (even though the manual says this check is
only done for usable SS, and the dmesg splat says that SS is unusable!).
On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb.

The fix fabricates a data segment descriptor when SS is set to a null
selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb.
Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3;
this in turn ensures CPL < 3 because RPL must be equal to CPL.

Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing
the bug and deciphering the manuals.

Reported-by: Xiaohan Zhang <zhangxiaohan1@huawei.com>
Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011
Cc: stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f36d0fa6b885..cedbba0f3402 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1585,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				    &ctxt->exception);
 }
 
-/* Does not support long mode */
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				     u16 selector, int seg, u8 cpl,
 				     enum x86_transfer_type transfer,
@@ -1622,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 	rpl = selector & 3;
 
-	/* NULL selector is not valid for TR, CS and SS (except for long mode) */
-	if ((seg == VCPU_SREG_CS
-	     || (seg == VCPU_SREG_SS
-		 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
-	     || seg == VCPU_SREG_TR)
-	    && null_selector)
-		goto exception;
-
 	/* TR should be in GDT only */
 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
 		goto exception;
 
-	if (null_selector) /* for NULL selector skip all following checks */
+	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
+	if (null_selector) {
+		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
+			goto exception;
+
+		if (seg == VCPU_SREG_SS) {
+			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
+				goto exception;
+
+			/*
+			 * ctxt->ops->set_segment expects the CPL to be in
+			 * SS.DPL, so fake an expand-up 32-bit data segment.
+			 */
+			seg_desc.type = 3;
+			seg_desc.p = 1;
+			seg_desc.s = 1;
+			seg_desc.dpl = cpl;
+			seg_desc.d = 1;
+			seg_desc.g = 1;
+		}
+
+		/* Skip all following checks */
 		goto load;
+	}
 
 	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
 	if (ret != X86EMUL_CONTINUE)
@@ -1751,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				   u16 selector, int seg)
 {
 	u8 cpl = ctxt->ops->cpl(ctxt);
+
+	/*
+	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
+	 * they can load it at CPL<3 (Intel's manual says only LSS can,
+	 * but it's wrong).
+	 *
+	 * However, the Intel manual says that putting IST=1/DPL=3 in
+	 * an interrupt gate will result in SS=3 (the AMD manual instead
+	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
+	 * and only forbid it here.
+	 */
+	if (seg == VCPU_SREG_SS && selector == 3 &&
+	    ctxt->mode == X86EMUL_MODE_PROT64)
+		return emulate_exception(ctxt, GP_VECTOR, 0, true);
+
 	return __load_segment_descriptor(ctxt, selector, seg, cpl,
 					 X86_TRANSFER_NONE, NULL);
 }

From f99e86485cc32cd16e5cc97f9bb0474f28608d84 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 12 Jan 2017 07:58:32 -0700
Subject: [PATCH 269/297] block: Rename blk_queue_zone_size and bdev_zone_size

All block device data fields and functions returning a number of 512B
sectors are by convention named xxx_sectors while names in the form
xxx_size are generally used for a number of bytes. The blk_queue_zone_size
and bdev_zone_size functions were not following this convention so rename
them.

No functional change is introduced by this patch.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>

Collapsed the two patches, they were nonsensically split and broke
bisection.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-zoned.c         |  4 ++--
 block/partition-generic.c | 14 +++++++-------
 fs/f2fs/segment.c         |  4 ++--
 fs/f2fs/super.c           |  6 +++---
 include/linux/blkdev.h    |  6 +++---
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 472211fa183a..3bd15d8095b1 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -16,7 +16,7 @@
 static inline sector_t blk_zone_start(struct request_queue *q,
 				      sector_t sector)
 {
-	sector_t zone_mask = blk_queue_zone_size(q) - 1;
+	sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
 
 	return sector & ~zone_mask;
 }
@@ -222,7 +222,7 @@ int blkdev_reset_zones(struct block_device *bdev,
 		return -EINVAL;
 
 	/* Check alignment (handle eventual smaller last zone) */
-	zone_sectors = blk_queue_zone_size(q);
+	zone_sectors = blk_queue_zone_sectors(q);
 	if (sector & (zone_sectors - 1))
 		return -EINVAL;
 
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7beb6bbbf66..7afb9907821f 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -434,7 +434,7 @@ static bool part_zone_aligned(struct gendisk *disk,
 			      struct block_device *bdev,
 			      sector_t from, sector_t size)
 {
-	unsigned int zone_size = bdev_zone_size(bdev);
+	unsigned int zone_sectors = bdev_zone_sectors(bdev);
 
 	/*
 	 * If this function is called, then the disk is a zoned block device
@@ -446,7 +446,7 @@ static bool part_zone_aligned(struct gendisk *disk,
 	 * regular block devices (no zone operation) and their zone size will
 	 * be reported as 0. Allow this case.
 	 */
-	if (!zone_size)
+	if (!zone_sectors)
 		return true;
 
 	/*
@@ -455,24 +455,24 @@ static bool part_zone_aligned(struct gendisk *disk,
 	 * use it. Check the zone size too: it should be a power of 2 number
 	 * of sectors.
 	 */
-	if (WARN_ON_ONCE(!is_power_of_2(zone_size))) {
+	if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
 		u32 rem;
 
-		div_u64_rem(from, zone_size, &rem);
+		div_u64_rem(from, zone_sectors, &rem);
 		if (rem)
 			return false;
 		if ((from + size) < get_capacity(disk)) {
-			div_u64_rem(size, zone_size, &rem);
+			div_u64_rem(size, zone_sectors, &rem);
 			if (rem)
 				return false;
 		}
 
 	} else {
 
-		if (from & (zone_size - 1))
+		if (from & (zone_sectors - 1))
 			return false;
 		if ((from + size) < get_capacity(disk) &&
-		    (size & (zone_size - 1)))
+		    (size & (zone_sectors - 1)))
 			return false;
 
 	}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0738f48293cc..0d8802453758 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -713,8 +713,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 	}
 	sector = SECTOR_FROM_BLOCK(blkstart);
 
-	if (sector & (bdev_zone_size(bdev) - 1) ||
-				nr_sects != bdev_zone_size(bdev)) {
+	if (sector & (bdev_zone_sectors(bdev) - 1) ||
+	    nr_sects != bdev_zone_sectors(bdev)) {
 		f2fs_msg(sbi->sb, KERN_INFO,
 			"(%d) %s: Unaligned discard attempted (block %x + %x)",
 			devi, sbi->s_ndevs ? FDEV(devi).path: "",
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 702638e21c76..46fd30d8af77 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1553,16 +1553,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 		return 0;
 
 	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
-				SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+				SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
 		return -EINVAL;
-	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
 	if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
 				__ilog2_u32(sbi->blocks_per_blkz))
 		return -EINVAL;
 	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
 	FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
 					sbi->log_blocks_per_blkz;
-	if (nr_sectors & (bdev_zone_size(bdev) - 1))
+	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
 		FDEV(devi).nr_blkz++;
 
 	FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 83695641bd5e..ff3d774f2751 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -739,7 +739,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 	}
 }
 
-static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 {
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
@@ -1536,12 +1536,12 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
 	return false;
 }
 
-static inline unsigned int bdev_zone_size(struct block_device *bdev)
+static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 
 	if (q)
-		return blk_queue_zone_size(q);
+		return blk_queue_zone_sectors(q);
 
 	return 0;
 }

From 41c066f2c4d436c535616fe182331766c57838f0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 11 Jan 2017 14:54:53 +0000
Subject: [PATCH 270/297] arm64: assembler: make adr_l work in modules under
 KASLR

When CONFIG_RANDOMIZE_MODULE_REGION_FULL=y, the offset between loaded
modules and the core kernel may exceed 4 GB, putting symbols exported
by the core kernel out of the reach of the ordinary adrp/add instruction
pairs used to generate relative symbol references. So make the adr_l
macro emit a movz/movk sequence instead when executing in module context.

While at it, remove the pointless special case for the stack pointer.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/assembler.h | 36 ++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 446f6c46d4b1..3a4301163e04 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -164,22 +164,25 @@ lr	.req	x30		// link register
 
 /*
  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
- * <symbol> is within the range +/- 4 GB of the PC.
+ * <symbol> is within the range +/- 4 GB of the PC when running
+ * in core kernel context. In module context, a movz/movk sequence
+ * is used, since modules may be loaded far away from the kernel
+ * when KASLR is in effect.
  */
 	/*
 	 * @dst: destination register (64 bit wide)
 	 * @sym: name of the symbol
-	 * @tmp: optional scratch register to be used if <dst> == sp, which
-	 *       is not allowed in an adrp instruction
 	 */
-	.macro	adr_l, dst, sym, tmp=
-	.ifb	\tmp
+	.macro	adr_l, dst, sym
+#ifndef MODULE
 	adrp	\dst, \sym
 	add	\dst, \dst, :lo12:\sym
-	.else
-	adrp	\tmp, \sym
-	add	\dst, \tmp, :lo12:\sym
-	.endif
+#else
+	movz	\dst, #:abs_g3:\sym
+	movk	\dst, #:abs_g2_nc:\sym
+	movk	\dst, #:abs_g1_nc:\sym
+	movk	\dst, #:abs_g0_nc:\sym
+#endif
 	.endm
 
 	/*
@@ -190,6 +193,7 @@ lr	.req	x30		// link register
 	 *       the address
 	 */
 	.macro	ldr_l, dst, sym, tmp=
+#ifndef MODULE
 	.ifb	\tmp
 	adrp	\dst, \sym
 	ldr	\dst, [\dst, :lo12:\sym]
@@ -197,6 +201,15 @@ lr	.req	x30		// link register
 	adrp	\tmp, \sym
 	ldr	\dst, [\tmp, :lo12:\sym]
 	.endif
+#else
+	.ifb	\tmp
+	adr_l	\dst, \sym
+	ldr	\dst, [\dst]
+	.else
+	adr_l	\tmp, \sym
+	ldr	\dst, [\tmp]
+	.endif
+#endif
 	.endm
 
 	/*
@@ -206,8 +219,13 @@ lr	.req	x30		// link register
 	 *       while <src> needs to be preserved.
 	 */
 	.macro	str_l, src, sym, tmp
+#ifndef MODULE
 	adrp	\tmp, \sym
 	str	\src, [\tmp, :lo12:\sym]
+#else
+	adr_l	\tmp, \sym
+	str	\src, [\tmp]
+#endif
 	.endm
 
 	/*

From cc8e8342930129aa2c9b629e1653e4681f0896ea Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 4 Jan 2017 16:21:58 +0800
Subject: [PATCH 271/297] ceph: fix mds cluster availability check

We should apply the check after getting the initial mdsmap.

Fixes: e9e427f0a14f ("ceph: check availability of mds cluster on mount")
Link: http://tracker.ceph.com/issues/18161
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f49253387a0..ec6b35e9f966 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2106,6 +2106,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
 			dout("do_request mdsmap err %d\n", err);
 			goto finish;
 		}
+		if (mdsc->mdsmap->m_epoch == 0) {
+			dout("do_request no mdsmap, waiting for map\n");
+			list_add(&req->r_wait, &mdsc->waiting_for_map);
+			goto finish;
+		}
 		if (!(mdsc->fsc->mount_options->flags &
 		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
 		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {

From 84fcc2d2bd6cbf621e49e1d0f7eaef2e3c666b40 Mon Sep 17 00:00:00 2001
From: "Geng, Jichao" <geng.jichao@h3c.com>
Date: Thu, 5 Jan 2017 16:50:39 +0800
Subject: [PATCH 272/297] ceph: fix get_oldest_context()

For no snapshot case, we should use ci->truncate_{seq,size}.

Fixes: 5f743e456606 ("ceph: record truncate size/seq for snap data writeback")
Signed-off-by: Geng, Jichao <geng.jichao@h3c.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/addr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9cd0c0ea7cdb..e4b066cd912a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -502,9 +502,9 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
 		dout(" head snapc %p has %d dirty pages\n",
 		     snapc, ci->i_wrbuffer_ref_head);
 		if (truncate_size)
-			*truncate_size = capsnap->truncate_size;
+			*truncate_size = ci->i_truncate_size;
 		if (truncate_seq)
-			*truncate_seq = capsnap->truncate_seq;
+			*truncate_seq = ci->i_truncate_seq;
 	}
 	spin_unlock(&ci->i_ceph_lock);
 	return snapc;

From 30f939feaeee23e21391cfc7b484f012eb189c3c Mon Sep 17 00:00:00 2001
From: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Date: Mon, 9 Jan 2017 22:53:36 +0700
Subject: [PATCH 273/297] i2c: fix kernel memory disclosure in dev interface

i2c_smbus_xfer() does not always fill an entire block, allowing
kernel stack memory disclosure through the temp variable. Clear
it before it's read to.

Signed-off-by: Vlad Tsyrklevich <vlad@tsyrklevich.net>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/i2c-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 66f323fd3982..6f638bbc922d 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -331,7 +331,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client,
 		unsigned long arg)
 {
 	struct i2c_smbus_ioctl_data data_arg;
-	union i2c_smbus_data temp;
+	union i2c_smbus_data temp = {};
 	int datasize, res;
 
 	if (copy_from_user(&data_arg,

From 331c34255293cd02d395b7097008b509ba89e60e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Jan 2017 20:57:22 -0800
Subject: [PATCH 274/297] i2c: do not enable fall back to Host Notify by
 default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Falling back unconditionally to HostNotify as primary client's interrupt
breaks some drivers which alter their functionality depending on whether
interrupt is present or not, so let's introduce a board flag telling I2C
core explicitly if we want wired interrupt or HostNotify-based one:
I2C_CLIENT_HOST_NOTIFY.

For DT-based systems we introduce "host-notify" property that we convert
to I2C_CLIENT_HOST_NOTIFY board flag.

Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c.txt |  8 ++++++++
 drivers/i2c/i2c-core.c                        | 17 ++++++++---------
 include/linux/i2c.h                           |  1 +
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
index 5fa691e6f638..cee9d5055fa2 100644
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -62,6 +62,9 @@ wants to support one of the below features, it should adapt the bindings below.
 	"irq" and "wakeup" names are recognized by I2C core, other names are
 	left to individual drivers.
 
+- host-notify
+	device uses SMBus host notify protocol instead of interrupt line.
+
 - multi-master
 	states that there is another master active on this bus. The OS can use
 	this information to adapt power management to keep the arbitration awake
@@ -81,6 +84,11 @@ Binding may contain optional "interrupts" property, describing interrupts
 used by the device. I2C core will assign "irq" interrupt (or the very first
 interrupt if not using interrupt names) as primary interrupt for the slave.
 
+Alternatively, devices supporting SMbus Host Notify, and connected to
+adapters that support this feature, may use "host-notify" property. I2C
+core will create a virtual interrupt for Host Notify and assign it as
+primary interrupt for the slave.
+
 Also, if device is marked as a wakeup source, I2C core will set up "wakeup"
 interrupt for the device. If "wakeup" interrupt name is not present in the
 binding, then primary interrupt will be used as wakeup interrupt.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index cf9e396d7702..7b117240f1ea 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -931,7 +931,10 @@ static int i2c_device_probe(struct device *dev)
 	if (!client->irq) {
 		int irq = -ENOENT;
 
-		if (dev->of_node) {
+		if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
+			dev_dbg(dev, "Using Host Notify IRQ\n");
+			irq = i2c_smbus_host_notify_to_irq(client);
+		} else if (dev->of_node) {
 			irq = of_irq_get_byname(dev->of_node, "irq");
 			if (irq == -EINVAL || irq == -ENODATA)
 				irq = of_irq_get(dev->of_node, 0);
@@ -940,14 +943,7 @@ static int i2c_device_probe(struct device *dev)
 		}
 		if (irq == -EPROBE_DEFER)
 			return irq;
-		/*
-		 * ACPI and OF did not find any useful IRQ, try to see
-		 * if Host Notify can be used.
-		 */
-		if (irq < 0) {
-			dev_dbg(dev, "Using Host Notify IRQ\n");
-			irq = i2c_smbus_host_notify_to_irq(client);
-		}
+
 		if (irq < 0)
 			irq = 0;
 
@@ -1716,6 +1712,9 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 	info.of_node = of_node_get(node);
 	info.archdata = &dev_ad;
 
+	if (of_property_read_bool(node, "host-notify"))
+		info.flags |= I2C_CLIENT_HOST_NOTIFY;
+
 	if (of_get_property(node, "wakeup-source", NULL))
 		info.flags |= I2C_CLIENT_WAKE;
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b2109c522dec..4b45ec46161f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -665,6 +665,7 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
 					/* Must equal I2C_M_TEN below */
 #define I2C_CLIENT_SLAVE	0x20	/* we are the slave */
+#define I2C_CLIENT_HOST_NOTIFY	0x40	/* We want to use I2C host notify */
 #define I2C_CLIENT_WAKE		0x80	/* for board_info; true iff can wake */
 #define I2C_CLIENT_SCCB		0x9000	/* Use Omnivision SCCB protocol */
 					/* Must match I2C_M_STOP|IGNORE_NAK */

From 6f724fb3039522486fce2e32e4c0fbe238a6ab02 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 6 Jan 2017 19:02:57 +0800
Subject: [PATCH 275/297] i2c: print correct device invalid address

In of_i2c_register_device(), when the check for
device address validity fails we print the info.addr,
which has not been assigned properly.

Fix this by printing the actual invalid address.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: b4e2f6ac1281 ("i2c: apply DT flags when probing")
Cc: stable@kernel.org
---
 drivers/i2c/i2c-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7b117240f1ea..c26296c2eac5 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1704,7 +1704,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 
 	if (i2c_check_addr_validity(addr, info.flags)) {
 		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
-			info.addr, node->full_name);
+			addr, node->full_name);
 		return ERR_PTR(-EINVAL);
 	}
 

From 2659161dd40dbb599a19f320164373093df44a89 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Dec 2016 22:27:33 +0000
Subject: [PATCH 276/297] i2c: fix spelling mistake: "insufficent" ->
 "insufficient"

Trivial fix to spelling mistake in WARN message, insufficient has
an insufficient number of i's in the spelling.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c26296c2eac5..583e95042a21 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -3632,7 +3632,7 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 	int ret;
 
 	if (!client || !slave_cb) {
-		WARN(1, "insufficent data\n");
+		WARN(1, "insufficient data\n");
 		return -EINVAL;
 	}
 

From 701dc207bf551d9fe6defa36e84a911e880398c3 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Wed, 11 Jan 2017 10:11:44 +0100
Subject: [PATCH 277/297] i2c: piix4: Avoid race conditions with IMC

On AMD's SB800 and upwards, the SMBus is shared with the Integrated
Micro Controller (IMC).

The platform provides a hardware semaphore to avoid race conditions
among them. (Check page 288 of the SB800-Series Southbridges Register
Reference Guide http://support.amd.com/TechDocs/45482.pdf)

Without this patch, many access to the SMBus end with an invalid
transaction or even with the bus stalled.

Reported-by: Alexandre Desnoyers <alex@qtec.com>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>:
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index c2268cdf38e8..e34d82e79b98 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -585,10 +585,29 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 		 u8 command, int size, union i2c_smbus_data *data)
 {
 	struct i2c_piix4_adapdata *adapdata = i2c_get_adapdata(adap);
+	unsigned short piix4_smba = adapdata->smba;
+	int retries = MAX_TIMEOUT;
+	int smbslvcnt;
 	u8 smba_en_lo;
 	u8 port;
 	int retval;
 
+	/* Request the SMBUS semaphore, avoid conflicts with the IMC */
+	smbslvcnt  = inb_p(SMBSLVCNT);
+	do {
+		outb_p(smbslvcnt | 0x10, SMBSLVCNT);
+
+		/* Check the semaphore status */
+		smbslvcnt  = inb_p(SMBSLVCNT);
+		if (smbslvcnt & 0x10)
+			break;
+
+		usleep_range(1000, 2000);
+	} while (--retries);
+	/* SMBus is still owned by the IMC, we give up */
+	if (!retries)
+		return -EBUSY;
+
 	mutex_lock(&piix4_mutex_sb800);
 
 	outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
@@ -606,6 +625,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
 	mutex_unlock(&piix4_mutex_sb800);
 
+	/* Release the semaphore */
+	outb_p(smbslvcnt | 0x20, SMBSLVCNT);
+
 	return retval;
 }
 

From d1b333d12cde9cabe898160b6be9769d3382d81c Mon Sep 17 00:00:00 2001
From: Jike Song <jike.song@intel.com>
Date: Thu, 12 Jan 2017 16:52:03 +0800
Subject: [PATCH 278/297] vfio iommu type1: fix the testing of capability for
 remote task

Before the mdev enhancement type1 iommu used capable() to test the
capability of current task; in the course of mdev development a
new requirement, testing for another task other than current, was
raised.  ns_capable() was used for this purpose, however it still
tests current, the only difference is, in a specified namespace.

Fix it by using has_capability() instead, which tests the cap for
specified task in init_user_ns, the same namespace as capable().

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Jike Song <jike.song@intel.com>
Reviewed-by: James Morris <james.l.morris@oracle.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9266271a787a..77373e51b283 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -495,8 +495,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 				  unsigned long *pfn_base, bool do_accounting)
 {
 	unsigned long limit;
-	bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns,
-				   CAP_IPC_LOCK);
+	bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK);
 	struct mm_struct *mm;
 	int ret;
 	bool rsvd;

From 3139dc8ded6f27552a248d23fe9f086e3027fa12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 11 Jan 2017 15:39:31 +0100
Subject: [PATCH 279/297] dmaengine: rcar-dmac: unmap slave resource when
 channel is freed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The slave mapping should be removed together with other channel
resources when the channel is freed. If it's not unmapped it will hang
around forever after the channel is freed.

Fixes: 9f878603dbdb7db3 ("dmaengine: rcar-dmac: add iommu support for slave transfers")
Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sh/rcar-dmac.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 2e441d0ccd79..4c357d475465 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -986,6 +986,7 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
 {
 	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
 	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct rcar_dmac_chan_map *map = &rchan->map;
 	struct rcar_dmac_desc_page *page, *_page;
 	struct rcar_dmac_desc *desc;
 	LIST_HEAD(list);
@@ -1019,6 +1020,13 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
 		free_page((unsigned long)page);
 	}
 
+	/* Remove slave mapping if present. */
+	if (map->slave.xfer_size) {
+		dma_unmap_resource(chan->device->dev, map->addr,
+				   map->slave.xfer_size, map->dir, 0);
+		map->slave.xfer_size = 0;
+	}
+
 	pm_runtime_put(chan->device->dev);
 }
 

From 94a6fa899d2cb5ee76933406df32996576a562e4 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 12 Jan 2017 08:24:16 -0700
Subject: [PATCH 280/297] vfio/type1: Remove pid_namespace.h include

Using has_capability() rather than ns_capable(), we're no longer using
this header.

Cc: Jike Song <jike.song@intel.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 77373e51b283..b3cc33fa6d26 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,7 +36,6 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
-#include <linux/pid_namespace.h>
 #include <linux/mdev.h>
 #include <linux/notifier.h>
 

From 2e3258ecfaebace1ceffaa14e0ea94775d54f46f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:10 +0100
Subject: [PATCH 281/297] block: add blk_rq_payload_bytes

Add a helper to calculate the actual data transfer size for special
payload requests.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ff3d774f2751..1ca8e8fd1078 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1000,6 +1000,19 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
+/*
+ * Some commands like WRITE SAME have a payload or data transfer size which
+ * is different from the size of the request.  Any driver that supports such
+ * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
+ * calculate the data transfer size.
+ */
+static inline unsigned int blk_rq_payload_bytes(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return rq->special_vec.bv_len;
+	return blk_rq_bytes(rq);
+}
+
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 						     int op)
 {

From fd102b125e174edbea34e6e7a2d371bc7901c53d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:11 +0100
Subject: [PATCH 282/297] scsi: use blk_rq_payload_bytes

Without that we'll pass a wrong payload size in cmd->sdb, which
can lead to hangs with drivers that need the total transfer size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Chris Valean <v-chvale@microsoft.com>
Reported-by: Dexuan Cui <decui@microsoft.com>
Fixes: f9d03f96 ("block: improve handling of the magic discard payload")
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c35b6de4ca64..ad4ff8fcd4dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1018,7 +1018,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
 	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
-	sdb->length = blk_rq_bytes(req);
+	sdb->length = blk_rq_payload_bytes(req);
 	return BLKPREP_OK;
 }
 

From b131c61d62266eb21b0f125f63f3d07e5670d726 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:12 +0100
Subject: [PATCH 283/297] nvme: use blk_rq_payload_bytes

The new blk_rq_payload_bytes generalizes the payload length hacks
that nvme_map_len did before.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c   |  5 ++---
 drivers/nvme/host/nvme.h |  8 --------
 drivers/nvme/host/pci.c  | 19 ++++++++-----------
 drivers/nvme/host/rdma.c | 13 +++++--------
 4 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index aa0bc60810a7..fcc9dcfdf675 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1654,13 +1654,12 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
 		struct nvme_fc_fcp_op *op)
 {
 	struct nvmefc_fcp_req *freq = &op->fcp_req;
-	u32 map_len = nvme_map_len(rq);
 	enum dma_data_direction dir;
 	int ret;
 
 	freq->sg_cnt = 0;
 
-	if (!map_len)
+	if (!blk_rq_payload_bytes(rq))
 		return 0;
 
 	freq->sg_table.sgl = freq->first_sgl;
@@ -1854,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret)
 		return ret;
 
-	data_len = nvme_map_len(rq);
+	data_len = blk_rq_payload_bytes(rq);
 	if (data_len)
 		io_dir = ((rq_data_dir(rq) == WRITE) ?
 					NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6377e14586dc..aead6d08ed2c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -225,14 +225,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-static inline unsigned nvme_map_len(struct request *rq)
-{
-	if (req_op(rq) == REQ_OP_DISCARD)
-		return sizeof(struct nvme_dsm_range);
-	else
-		return blk_rq_bytes(rq);
-}
-
 static inline void nvme_cleanup_cmd(struct request *req)
 {
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 19beeb7b2ac2..3faefabf339c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -306,11 +306,11 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, unsigned size,
-		struct nvme_dev *dev)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
+	unsigned int size = blk_rq_payload_bytes(rq);
 
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -420,12 +420,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
-		int total_len)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct dma_pool *pool;
-	int length = total_len;
+	int length = blk_rq_payload_bytes(req);
 	struct scatterlist *sg = iod->sg;
 	int dma_len = sg_dma_len(sg);
 	u64 dma_addr = sg_dma_address(sg);
@@ -501,7 +500,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
 }
 
 static int nvme_map_data(struct nvme_dev *dev, struct request *req,
-		unsigned size, struct nvme_command *cmnd)
+		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
@@ -519,7 +518,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 				DMA_ATTR_NO_WARN))
 		goto out;
 
-	if (!nvme_setup_prps(dev, req, size))
+	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
 	ret = BLK_MQ_RQ_QUEUE_ERROR;
@@ -580,7 +579,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	unsigned map_len;
 	int ret = BLK_MQ_RQ_QUEUE_OK;
 
 	/*
@@ -600,13 +598,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		return ret;
 
-	map_len = nvme_map_len(req);
-	ret = nvme_init_iod(req, map_len, dev);
+	ret = nvme_init_iod(req, dev);
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		goto out_free_cmd;
 
 	if (blk_rq_nr_phys_segments(req))
-		ret = nvme_map_data(dev, req, map_len, &cmnd);
+		ret = nvme_map_data(dev, req, &cmnd);
 
 	if (ret != BLK_MQ_RQ_QUEUE_OK)
 		goto out_cleanup_iod;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 34e564857716..557f29b1f1bb 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -981,8 +981,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
 }
 
 static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
-		struct request *rq, unsigned int map_len,
-		struct nvme_command *c)
+		struct request *rq, struct nvme_command *c)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_device *dev = queue->device;
@@ -1014,9 +1013,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	}
 
 	if (count == 1) {
-		if (rq_data_dir(rq) == WRITE &&
-		    map_len <= nvme_rdma_inline_data_size(queue) &&
-		    nvme_rdma_queue_idx(queue))
+		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+		    blk_rq_payload_bytes(rq) <=
+				nvme_rdma_inline_data_size(queue))
 			return nvme_rdma_map_sg_inline(queue, req, c);
 
 		if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -1444,7 +1443,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	unsigned int map_len;
 	int ret;
 
 	WARN_ON_ONCE(rq->tag < 0);
@@ -1462,8 +1460,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
-	map_len = nvme_map_len(rq);
-	ret = nvme_rdma_map_data(queue, rq, map_len, c);
+	ret = nvme_rdma_map_data(queue, rq, c);
 	if (ret < 0) {
 		dev_err(queue->ctrl->ctrl.device,
 			     "Failed to map data (%d)\n", ret);

From f80de881d8df967488b7343381619efa15019493 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:13 +0100
Subject: [PATCH 284/297] sd: remove __data_len hack for WRITE SAME

Now that we have the blk_rq_payload_bytes helper available to determine
the actual I/O size we don't need to mess around with __data_len for
WRITE SAME.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/sd.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b1933041da39..1fbb1ecf49f2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -836,7 +836,6 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	struct bio *bio = rq->bio;
 	sector_t sector = blk_rq_pos(rq);
 	unsigned int nr_sectors = blk_rq_sectors(rq);
-	unsigned int nr_bytes = blk_rq_bytes(rq);
 	int ret;
 
 	if (sdkp->device->no_write_same)
@@ -869,21 +868,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 
 	cmd->transfersize = sdp->sector_size;
 	cmd->allowed = SD_MAX_RETRIES;
-
-	/*
-	 * For WRITE_SAME the data transferred in the DATA IN buffer is
-	 * different from the amount of data actually written to the target.
-	 *
-	 * We set up __data_len to the amount of data transferred from the
-	 * DATA IN buffer so that blk_rq_map_sg set up the proper S/G list
-	 * to transfer a single sector of data first, but then reset it to
-	 * the amount of data to be written right after so that the I/O path
-	 * knows how much to actually write.
-	 */
-	rq->__data_len = sdp->sector_size;
-	ret = scsi_init_io(cmd);
-	rq->__data_len = nr_bytes;
-	return ret;
+	return scsi_init_io(cmd);
 }
 
 static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd)

From bef13315e990fd3d3fb4c39013aefd53f06c3657 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 15:18:16 -0700
Subject: [PATCH 285/297] block: don't try to discard from
 __blkdev_issue_zeroout

Discard can return -EIO asynchronously if the alignment for the request
isn't suitable for the driver, which makes a proper fallback to other
methods in __blkdev_issue_zeroout impossible.  Thus only issue a sync
discard from blkdev_issue_zeroout an don't try discard at all from
__blkdev_issue_zeroout as a non-invasive workaround.

One more reason why abusing discard for zeroing must die..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Eryu Guan <eguan@redhat.com>
Fixes: e73c23ff ("block: add async variant of blkdev_issue_zeroout")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-lib.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index ed89c8f4b2a0..f8c82a9b4012 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -301,13 +301,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
-	if (discard) {
-		ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
-				BLKDEV_DISCARD_ZERO, biop);
-		if (ret == 0 || (ret && ret != -EOPNOTSUPP))
-			goto out;
-	}
-
 	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
 			biop);
 	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
@@ -370,6 +363,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	struct bio *bio = NULL;
 	struct blk_plug plug;
 
+	if (discard) {
+		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+				BLKDEV_DISCARD_ZERO))
+			return 0;
+	}
+
 	blk_start_plug(&plug);
 	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
 			&bio, discard);

From 695085b4bc7603551db0b3da897b8bf9893ca218 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 13 Jan 2017 01:11:18 -0500
Subject: [PATCH 286/297] x86/tsc: Add the Intel Denverton Processor to
 native_calibrate_tsc()

The Intel Denverton microserver uses a 25 MHz TSC crystal,
so we can derive its exact [*] TSC frequency
using CPUID and some arithmetic, eg.:

  TSC: 1800 MHz (25000000 Hz * 216 / 3 / 1000000)

[*] 'exact' is only as good as the crystal, which should be +/- 20ppm

Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/306899f94804aece6d8fa8b4223ede3b48dbb59c.1484287748.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index be3a49ee0356..e41af597aed8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void)
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
 		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
 		case INTEL_FAM6_ATOM_GOLDMONT:

From 453828625731d0ba7218242ef6ec88f59408f368 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 12 Jan 2017 16:53:11 +0100
Subject: [PATCH 287/297] x86/mpx: Use compatible types in comparison to fix
 sparse error

info->si_addr is of type void __user *, so it should be compared against
something from the same address space.

This fixes the following sparse error:

  arch/x86/mm/mpx.c:296:27: error: incompatible types in comparison expression (different address spaces)

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mpx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 324e5713d386..af59f808742f 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 	 * We were not able to extract an address from the instruction,
 	 * probably because there was something invalid in it.
 	 */
-	if (info->si_addr == (void *)-1) {
+	if (info->si_addr == (void __user *)-1) {
 		err = -EINVAL;
 		goto err_out;
 	}

From 63cae12bce9861cec309798d34701cf3da20bc71 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 9 Dec 2016 14:59:00 +0100
Subject: [PATCH 288/297] perf/core: Fix sys_perf_event_open() vs. hotplug

There is problem with installing an event in a task that is 'stuck' on
an offline CPU.

Blocked tasks are not dis-assosciated from offlined CPUs, after all, a
blocked task doesn't run and doesn't require a CPU etc.. Only on
wakeup do we ammend the situation and place the task on a available
CPU.

If we hit such a task with perf_install_in_context() we'll loop until
either that task wakes up or the CPU comes back online, if the task
waking depends on the event being installed, we're stuck.

While looking into this issue, I also spotted another problem, if we
hit a task with perf_install_in_context() that is in the middle of
being migrated, that is we observe the old CPU before sending the IPI,
but run the IPI (on the old CPU) while the task is already running on
the new CPU, things also go sideways.

Rework things to rely on task_curr() -- outside of rq->lock -- which
is rather tricky. Imagine the following scenario where we're trying to
install the first event into our task 't':

CPU0            CPU1            CPU2

                (current == t)

t->perf_event_ctxp[] = ctx;
smp_mb();
cpu = task_cpu(t);

                switch(t, n);
                                migrate(t, 2);
                                switch(p, t);

                                ctx = t->perf_event_ctxp[]; // must not be NULL

smp_function_call(cpu, ..);

                generic_exec_single()
                  func();
                    spin_lock(ctx->lock);
                    if (task_curr(t)) // false

                    add_event_to_ctx();
                    spin_unlock(ctx->lock);

                                perf_event_context_sched_in();
                                  spin_lock(ctx->lock);
                                  // sees event

So its CPU0's store of t->perf_event_ctxp[] that must not go 'missing'.
Because if CPU2's load of that variable were to observe NULL, it would
not try to schedule the ctx and we'd have a task running without its
counter, which would be 'bad'.

As long as we observe !NULL, we'll acquire ctx->lock. If we acquire it
first and not see the event yet, then CPU0 must observe task_curr()
and retry. If the install happens first, then we must see the event on
sched-in and all is well.

I think we can translate the first part (until the 'must not be NULL')
of the scenario to a litmus test like:

  C C-peterz

  {
  }

  P0(int *x, int *y)
  {
          int r1;

          WRITE_ONCE(*x, 1);
          smp_mb();
          r1 = READ_ONCE(*y);
  }

  P1(int *y, int *z)
  {
          WRITE_ONCE(*y, 1);
          smp_store_release(z, 1);
  }

  P2(int *x, int *z)
  {
          int r1;
          int r2;

          r1 = smp_load_acquire(z);
	  smp_mb();
          r2 = READ_ONCE(*x);
  }

  exists
  (0:r1=0 /\ 2:r1=1 /\ 2:r2=0)

Where:
  x is perf_event_ctxp[],
  y is our tasks's CPU, and
  z is our task being placed on the rq of CPU2.

The P0 smp_mb() is the one added by this patch, ordering the store to
perf_event_ctxp[] from find_get_context() and the load of task_cpu()
in task_function_call().

The smp_store_release/smp_load_acquire model the RCpc locking of the
rq->lock and the smp_mb() of P2 is the context switch switching from
whatever CPU2 was running to our task 't'.

This litmus test evaluates into:

  Test C-peterz Allowed
  States 7
  0:r1=0; 2:r1=0; 2:r2=0;
  0:r1=0; 2:r1=0; 2:r2=1;
  0:r1=0; 2:r1=1; 2:r2=1;
  0:r1=1; 2:r1=0; 2:r2=0;
  0:r1=1; 2:r1=0; 2:r2=1;
  0:r1=1; 2:r1=1; 2:r2=0;
  0:r1=1; 2:r1=1; 2:r2=1;
  No
  Witnesses
  Positive: 0 Negative: 7
  Condition exists (0:r1=0 /\ 2:r1=1 /\ 2:r2=0)
  Observation C-peterz Never 0 7
  Hash=e427f41d9146b2a5445101d3e2fcaa34

And the strong and weak model agree.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Will Deacon <will.deacon@arm.com>
Cc: jeremy.linton@arm.com
Link: http://lkml.kernel.org/r/20161209135900.GU3174@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 70 ++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 22 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509fab8c..72ce7d63e561 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2249,7 +2249,7 @@ static int  __perf_install_in_context(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	bool activate = true;
+	bool reprogram = true;
 	int ret = 0;
 
 	raw_spin_lock(&cpuctx->ctx.lock);
@@ -2257,27 +2257,26 @@ static int  __perf_install_in_context(void *info)
 		raw_spin_lock(&ctx->lock);
 		task_ctx = ctx;
 
-		/* If we're on the wrong CPU, try again */
-		if (task_cpu(ctx->task) != smp_processor_id()) {
+		reprogram = (ctx->task == current);
+
+		/*
+		 * If the task is running, it must be running on this CPU,
+		 * otherwise we cannot reprogram things.
+		 *
+		 * If its not running, we don't care, ctx->lock will
+		 * serialize against it becoming runnable.
+		 */
+		if (task_curr(ctx->task) && !reprogram) {
 			ret = -ESRCH;
 			goto unlock;
 		}
 
-		/*
-		 * If we're on the right CPU, see if the task we target is
-		 * current, if not we don't have to activate the ctx, a future
-		 * context switch will do that for us.
-		 */
-		if (ctx->task != current)
-			activate = false;
-		else
-			WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
-
+		WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
 	} else if (task_ctx) {
 		raw_spin_lock(&task_ctx->lock);
 	}
 
-	if (activate) {
+	if (reprogram) {
 		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 		add_event_to_ctx(event, ctx);
 		ctx_resched(cpuctx, task_ctx);
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
 	/*
 	 * Installing events is tricky because we cannot rely on ctx->is_active
 	 * to be set in case this is the nr_events 0 -> 1 transition.
+	 *
+	 * Instead we use task_curr(), which tells us if the task is running.
+	 * However, since we use task_curr() outside of rq::lock, we can race
+	 * against the actual state. This means the result can be wrong.
+	 *
+	 * If we get a false positive, we retry, this is harmless.
+	 *
+	 * If we get a false negative, things are complicated. If we are after
+	 * perf_event_context_sched_in() ctx::lock will serialize us, and the
+	 * value must be correct. If we're before, it doesn't matter since
+	 * perf_event_context_sched_in() will program the counter.
+	 *
+	 * However, this hinges on the remote context switch having observed
+	 * our task->perf_event_ctxp[] store, such that it will in fact take
+	 * ctx::lock in perf_event_context_sched_in().
+	 *
+	 * We do this by task_function_call(), if the IPI fails to hit the task
+	 * we know any future context switch of task must see the
+	 * perf_event_ctpx[] store.
 	 */
-again:
+
 	/*
-	 * Cannot use task_function_call() because we need to run on the task's
-	 * CPU regardless of whether its current or not.
+	 * This smp_mb() orders the task->perf_event_ctxp[] store with the
+	 * task_cpu() load, such that if the IPI then does not find the task
+	 * running, a future context switch of that task must observe the
+	 * store.
 	 */
-	if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+	smp_mb();
+again:
+	if (!task_function_call(task, __perf_install_in_context, event))
 		return;
 
 	raw_spin_lock_irq(&ctx->lock);
@@ -2348,12 +2370,16 @@ again:
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
-	raw_spin_unlock_irq(&ctx->lock);
 	/*
-	 * Since !ctx->is_active doesn't mean anything, we must IPI
-	 * unconditionally.
+	 * If the task is not running, ctx->lock will avoid it becoming so,
+	 * thus we can safely install the event.
 	 */
-	goto again;
+	if (task_curr(task)) {
+		raw_spin_unlock_irq(&ctx->lock);
+		goto again;
+	}
+	add_event_to_ctx(event, ctx);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 /*

From 321027c1fe77f892f4ea07846aeae08cefbbb290 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 11 Jan 2017 21:09:50 +0100
Subject: [PATCH 289/297] perf/core: Fix concurrent sys_perf_event_open() vs.
 'move_group' race

Di Shen reported a race between two concurrent sys_perf_event_open()
calls where both try and move the same pre-existing software group
into a hardware context.

The problem is exactly that described in commit:

  f63a8daa5812 ("perf: Fix event->ctx locking")

... where, while we wait for a ctx->mutex acquisition, the event->ctx
relation can have changed under us.

That very same commit failed to recognise sys_perf_event_context() as an
external access vector to the events and thereby didn't apply the
established locking rules correctly.

So while one sys_perf_event_open() call is stuck waiting on
mutex_lock_double(), the other (which owns said locks) moves the group
about. So by the time the former sys_perf_event_open() acquires the
locks, the context we've acquired is stale (and possibly dead).

Apply the established locking rules as per perf_event_ctx_lock_nested()
to the mutex_lock_double() for the 'move_group' case. This obviously means
we need to validate state after we acquire the locks.

Reported-by: Di Shen (Keen Lab)
Tested-by: John Dias <joaodias@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Min Chong <mchong@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: f63a8daa5812 ("perf: Fix event->ctx locking")
Link: http://lkml.kernel.org/r/20170106131444.GZ3174@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 58 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72ce7d63e561..cbc5937265da 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9529,6 +9529,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
 	return 0;
 }
 
+/*
+ * Variation on perf_event_ctx_lock_nested(), except we take two context
+ * mutexes.
+ */
+static struct perf_event_context *
+__perf_event_ctx_lock_double(struct perf_event *group_leader,
+			     struct perf_event_context *ctx)
+{
+	struct perf_event_context *gctx;
+
+again:
+	rcu_read_lock();
+	gctx = READ_ONCE(group_leader->ctx);
+	if (!atomic_inc_not_zero(&gctx->refcount)) {
+		rcu_read_unlock();
+		goto again;
+	}
+	rcu_read_unlock();
+
+	mutex_lock_double(&gctx->mutex, &ctx->mutex);
+
+	if (group_leader->ctx != gctx) {
+		mutex_unlock(&ctx->mutex);
+		mutex_unlock(&gctx->mutex);
+		put_ctx(gctx);
+		goto again;
+	}
+
+	return gctx;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -9772,12 +9803,31 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (move_group) {
-		gctx = group_leader->ctx;
-		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+		gctx = __perf_event_ctx_lock_double(group_leader, ctx);
+
 		if (gctx->task == TASK_TOMBSTONE) {
 			err = -ESRCH;
 			goto err_locked;
 		}
+
+		/*
+		 * Check if we raced against another sys_perf_event_open() call
+		 * moving the software group underneath us.
+		 */
+		if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
+			/*
+			 * If someone moved the group out from under us, check
+			 * if this new event wound up on the same ctx, if so
+			 * its the regular !move_group case, otherwise fail.
+			 */
+			if (gctx != ctx) {
+				err = -EINVAL;
+				goto err_locked;
+			} else {
+				perf_event_ctx_unlock(group_leader, gctx);
+				move_group = 0;
+			}
+		}
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
@@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_unpin_context(ctx);
 
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 
 	if (task) {
@@ -9905,7 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open,
 
 err_locked:
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 /* err_file: */
 	fput(event_file);

From 475113d937adfd150eb82b5e2c5507125a68e7af Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 28 Dec 2016 14:31:03 +0100
Subject: [PATCH 290/297] perf/x86/intel: Account interrupts for PEBS errors

It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.

We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c |  6 ++++-
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 47 +++++++++++++++++++++++++-------------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cbc5937265da..110b38a58493 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static int
+__perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
 			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
+	return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = __perf_event_account_interrupt(event, throttle);
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events

From 18e7a45af91acdde99d3aa1372cc40e1f8142f7b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 3 Jan 2017 15:24:54 +0100
Subject: [PATCH 291/297] perf/x86: Reject non sampling events with precise_ip

As Peter suggested [1] rejecting non sampling PEBS events,
because they dont make any sense and could cause bugs
in the NMI handler [2].

  [1] http://lkml.kernel.org/r/20170103094059.GC3093@worktop
  [2] http://lkml.kernel.org/r/1482931866-6018-3-git-send-email-jolsa@kernel.org

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20170103142454.GA26251@krava
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 019c5887b698..1635c0c8df23 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event)
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
+
+		/* There's no sense in having PEBS for non sampling events: */
+		if (!is_sampling_event(event))
+			return -EINVAL;
 	}
 	/*
 	 * check that PEBS LBR correction does not conflict with

From c7334ce814f7e5d8fc1f9b3126cda0640c2f81b3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 14 Jan 2017 14:09:03 +0100
Subject: [PATCH 292/297] Revert "driver core: Add deferred_probe attribute to
 devices in sysfs"

This reverts commit 6751667a29d6fd64afb9ce30567ad616b68ed789.

Rob Herring objected to it, and a replacement for it will be added using
debugfs in the future.

Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-deferred_probe        | 12 ------------
 drivers/base/base.h                                 |  2 --
 drivers/base/core.c                                 |  7 -------
 drivers/base/dd.c                                   | 13 -------------
 4 files changed, 34 deletions(-)
 delete mode 100644 Documentation/ABI/testing/sysfs-devices-deferred_probe

diff --git a/Documentation/ABI/testing/sysfs-devices-deferred_probe b/Documentation/ABI/testing/sysfs-devices-deferred_probe
deleted file mode 100644
index 58553d7a321f..000000000000
--- a/Documentation/ABI/testing/sysfs-devices-deferred_probe
+++ /dev/null
@@ -1,12 +0,0 @@
-What:		/sys/devices/.../deferred_probe
-Date:		August 2016
-Contact:	Ben Hutchings <ben.hutchings@codethink.co.uk>
-Description:
-		The /sys/devices/.../deferred_probe attribute is
-		present for all devices.  If a driver detects during
-		probing a device that a related device is not yet
-		ready, it may defer probing of the first device.  The
-		kernel will retry probing the first device after any
-		other device is successfully probed.  This attribute
-		reads as 1 if probing of this device is currently
-		deferred, or 0 otherwise.
diff --git a/drivers/base/base.h b/drivers/base/base.h
index ada9dce34e6d..e19b1008e5fb 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -141,8 +141,6 @@ extern void device_unblock_probing(void);
 extern struct kset *devices_kset;
 extern void devices_kset_move_last(struct device *dev);
 
-extern struct device_attribute dev_attr_deferred_probe;
-
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
 extern void module_add_driver(struct module *mod, struct device_driver *drv);
 extern void module_remove_driver(struct device_driver *drv);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 020ea7f05520..8c25e68e67d7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1060,14 +1060,8 @@ static int device_add_attrs(struct device *dev)
 			goto err_remove_dev_groups;
 	}
 
-	error = device_create_file(dev, &dev_attr_deferred_probe);
-	if (error)
-		goto err_remove_online;
-
 	return 0;
 
- err_remove_online:
-	device_remove_file(dev, &dev_attr_online);
  err_remove_dev_groups:
 	device_remove_groups(dev, dev->groups);
  err_remove_type_groups:
@@ -1085,7 +1079,6 @@ static void device_remove_attrs(struct device *dev)
 	struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 
-	device_remove_file(dev, &dev_attr_deferred_probe);
 	device_remove_file(dev, &dev_attr_online);
 	device_remove_groups(dev, dev->groups);
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a8b258e5407b..a1fbf55c4d3a 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -53,19 +53,6 @@ static LIST_HEAD(deferred_probe_pending_list);
 static LIST_HEAD(deferred_probe_active_list);
 static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
 
-static ssize_t deferred_probe_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	bool value;
-
-	mutex_lock(&deferred_probe_mutex);
-	value = !list_empty(&dev->p->deferred_probe);
-	mutex_unlock(&deferred_probe_mutex);
-
-	return sprintf(buf, "%d\n", value);
-}
-DEVICE_ATTR_RO(deferred_probe);
-
 /*
  * In some cases, like suspend to RAM or hibernation, It might be reasonable
  * to prohibit probing of devices as it could be unsafe.

From 0100a3e67a9cef64d72cd3a1da86f3ddbee50363 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 12 Dec 2016 18:42:28 -0500
Subject: [PATCH 293/297] efi/x86: Prune invalid memory map entries and fix
 boot regression

Some machines, such as the Lenovo ThinkPad W541 with firmware GNET80WW
(2.28), include memory map entries with phys_addr=0x0 and num_pages=0.

These machines fail to boot after the following commit,

  commit 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()")

Fix this by removing such bogus entries from the memory map.

Furthermore, currently the log output for this case (with efi=debug)
looks like:

 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |  |  |  |  |  ] range=[0x0000000000000000-0xffffffffffffffff] (0MB)

This is clearly wrong, and also not as informative as it could be.  This
patch changes it so that if we find obviously invalid memory map
entries, we print an error and skip those entries.  It also detects the
display of the address range calculation overflow, so the new output is:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[0x0000000000000000-0x0000000000000000] (invalid)

It also detects memory map sizes that would overflow the physical
address, for example phys_addr=0xfffffffffffff000 and
num_pages=0x0200000000000001, and prints:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[phys_addr=0xfffffffffffff000-0x20ffffffffffffffff] (invalid)

It then removes these entries from the memory map.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ardb: refactor for clarity with no functional changes, avoid PAGE_SHIFT]
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
[Matt: Include bugzilla info in commit log]
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191121
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi.c | 66 +++++++++++++++++++++++++++++++++++++
 include/linux/efi.h         |  1 +
 2 files changed, 67 insertions(+)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 936a488d6cf6..274dfc481849 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void)
 	return 0;
 }
 
+#define OVERFLOW_ADDR_SHIFT	(64 - EFI_PAGE_SHIFT)
+#define OVERFLOW_ADDR_MASK	(U64_MAX << OVERFLOW_ADDR_SHIFT)
+#define U64_HIGH_BIT		(~(U64_MAX >> 1))
+
+static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i)
+{
+	u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1;
+	u64 end_hi = 0;
+	char buf[64];
+
+	if (md->num_pages == 0) {
+		end = 0;
+	} else if (md->num_pages > EFI_PAGES_MAX ||
+		   EFI_PAGES_MAX - md->num_pages <
+		   (md->phys_addr >> EFI_PAGE_SHIFT)) {
+		end_hi = (md->num_pages & OVERFLOW_ADDR_MASK)
+			>> OVERFLOW_ADDR_SHIFT;
+
+		if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT))
+			end_hi += 1;
+	} else {
+		return true;
+	}
+
+	pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n");
+
+	if (end_hi) {
+		pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n",
+			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			md->phys_addr, end_hi, end);
+	} else {
+		pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n",
+			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			md->phys_addr, end);
+	}
+	return false;
+}
+
+static void __init efi_clean_memmap(void)
+{
+	efi_memory_desc_t *out = efi.memmap.map;
+	const efi_memory_desc_t *in = out;
+	const efi_memory_desc_t *end = efi.memmap.map_end;
+	int i, n_removal;
+
+	for (i = n_removal = 0; in < end; i++) {
+		if (efi_memmap_entry_valid(in, i)) {
+			if (out != in)
+				memcpy(out, in, efi.memmap.desc_size);
+			out = (void *)out + efi.memmap.desc_size;
+		} else {
+			n_removal++;
+		}
+		in = (void *)in + efi.memmap.desc_size;
+	}
+
+	if (n_removal > 0) {
+		u64 size = efi.memmap.nr_map - n_removal;
+
+		pr_warn("Removing %d invalid memory map entries.\n", n_removal);
+		efi_memmap_install(efi.memmap.phys_map, size);
+	}
+}
+
 void __init efi_print_memmap(void)
 {
 	efi_memory_desc_t *md;
@@ -472,6 +536,8 @@ void __init efi_init(void)
 		}
 	}
 
+	efi_clean_memmap();
+
 	if (efi_enabled(EFI_DBG))
 		efi_print_memmap();
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0c5420208c40..5b1af30ece55 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -103,6 +103,7 @@ typedef	struct {
 
 #define EFI_PAGE_SHIFT		12
 #define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
+#define EFI_PAGES_MAX		(U64_MAX >> EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;

From a12f1ae61c489076a9aeb90bddca7722bf330df3 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Tue, 13 Dec 2016 12:09:56 -0800
Subject: [PATCH 294/297] aio: fix lock dep warning

lockdep reports a warnning. file_start_write/file_end_write only
acquire/release the lock for regular files. So checking the files in aio
side too.

[  453.532141] ------------[ cut here ]------------
[  453.533011] WARNING: CPU: 1 PID: 1298 at ../kernel/locking/lockdep.c:3514 lock_release+0x434/0x670
[  453.533011] DEBUG_LOCKS_WARN_ON(depth <= 0)
[  453.533011] Modules linked in:
[  453.533011] CPU: 1 PID: 1298 Comm: fio Not tainted 4.9.0+ #964
[  453.533011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014
[  453.533011]  ffff8803a24b7a70 ffffffff8196cffb ffff8803a24b7ae8 0000000000000000
[  453.533011]  ffff8803a24b7ab8 ffffffff81091ee1 ffff8803a5dba700 00000dba00000008
[  453.533011]  ffffed0074496f59 ffff8803a5dbaf54 ffff8803ae0f8488 fffffffffffffdef
[  453.533011] Call Trace:
[  453.533011]  [<ffffffff8196cffb>] dump_stack+0x67/0x9c
[  453.533011]  [<ffffffff81091ee1>] __warn+0x111/0x130
[  453.533011]  [<ffffffff81091f97>] warn_slowpath_fmt+0x97/0xb0
[  453.533011]  [<ffffffff81091f00>] ? __warn+0x130/0x130
[  453.533011]  [<ffffffff8191b789>] ? blk_finish_plug+0x29/0x60
[  453.533011]  [<ffffffff811205d4>] lock_release+0x434/0x670
[  453.533011]  [<ffffffff8198af94>] ? import_single_range+0xd4/0x110
[  453.533011]  [<ffffffff81322195>] ? rw_verify_area+0x65/0x140
[  453.533011]  [<ffffffff813aa696>] ? aio_write+0x1f6/0x280
[  453.533011]  [<ffffffff813aa6c9>] aio_write+0x229/0x280
[  453.533011]  [<ffffffff813aa4a0>] ? aio_complete+0x640/0x640
[  453.533011]  [<ffffffff8111df20>] ? debug_check_no_locks_freed+0x1a0/0x1a0
[  453.533011]  [<ffffffff8114793a>] ? debug_lockdep_rcu_enabled.part.2+0x1a/0x30
[  453.533011]  [<ffffffff81147985>] ? debug_lockdep_rcu_enabled+0x35/0x40
[  453.533011]  [<ffffffff812a92be>] ? __might_fault+0x7e/0xf0
[  453.533011]  [<ffffffff813ac9bc>] do_io_submit+0x94c/0xb10
[  453.533011]  [<ffffffff813ac2ae>] ? do_io_submit+0x23e/0xb10
[  453.533011]  [<ffffffff813ac070>] ? SyS_io_destroy+0x270/0x270
[  453.533011]  [<ffffffff8111d7b3>] ? mark_held_locks+0x23/0xc0
[  453.533011]  [<ffffffff8100201a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
[  453.533011]  [<ffffffff813acb90>] SyS_io_submit+0x10/0x20
[  453.533011]  [<ffffffff824f96aa>] entry_SYSCALL_64_fastpath+0x18/0xad
[  453.533011]  [<ffffffff81119190>] ? trace_hardirqs_off_caller+0xc0/0x110
[  453.533011] ---[ end trace b2fbe664d1cc0082 ]---

Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 4ab67e8cb776..873b4ca82ccb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
 		 * Tell lockdep we inherited freeze protection from submission
 		 * thread.
 		 */
-		__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		if (S_ISREG(file_inode(file)->i_mode))
+			__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 		file_end_write(file);
 	}
 
@@ -1525,7 +1526,8 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 		 * by telling it the lock got released so that it doesn't
 		 * complain about held lock when we return to userspace.
 		 */
-		__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		if (S_ISREG(file_inode(file)->i_mode))
+			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 	}
 	kfree(iovec);
 	return ret;

From 4d22c75d4c7b5c5f4bd31054f09103ee490878fd Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Wed, 11 Jan 2017 13:25:00 -0600
Subject: [PATCH 295/297] coredump: Ensure proper size of sparse core files

If the last section of a core file ends with an unmapped or zero page,
the size of the file does not correspond with the last dump_skip() call.
gdb complains that the file is truncated and can be confusing to users.

After all of the vma sections are written, make sure that the file size
is no smaller than the current file position.

This problem can be demonstrated with gdb's bigcore testcase on the
sparc architecture.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          |  1 +
 fs/coredump.c            | 18 ++++++++++++++++++
 include/linux/coredump.h |  1 +
 3 files changed, 20 insertions(+)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 29a02daf08a9..422370293cfd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2298,6 +2298,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				goto end_coredump;
 		}
 	}
+	dump_truncate(cprm);
 
 	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
diff --git a/fs/coredump.c b/fs/coredump.c
index e525b6017cdf..ae6b05629ca1 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -833,3 +833,21 @@ int dump_align(struct coredump_params *cprm, int align)
 	return mod ? dump_skip(cprm, align - mod) : 1;
 }
 EXPORT_SYMBOL(dump_align);
+
+/*
+ * Ensures that file size is big enough to contain the current file
+ * postion. This prevents gdb from complaining about a truncated file
+ * if the last "write" to the file was dump_skip.
+ */
+void dump_truncate(struct coredump_params *cprm)
+{
+	struct file *file = cprm->file;
+	loff_t offset;
+
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		offset = file->f_op->llseek(file, 0, SEEK_CUR);
+		if (i_size_read(file->f_mapping->host) < offset)
+			do_truncate(file->f_path.dentry, offset, 0, file);
+	}
+}
+EXPORT_SYMBOL(dump_truncate);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d016a121a8c4..28ffa94aed6b 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,6 +14,7 @@ struct coredump_params;
 extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
+extern void dump_truncate(struct coredump_params *cprm);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(const siginfo_t *siginfo);
 #else

From b9dc6f65bc5e232d1c05fe34b5daadc7e8bbf1fb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 14 Jan 2017 19:33:08 -0500
Subject: [PATCH 296/297] fix a fencepost error in pipe_advance()

The logics in pipe_advance() used to release all buffers past the new
position failed in cases when the number of buffers to release was equal
to pipe->buffers.  If that happened, none of them had been released,
leaving pipe full.  Worse, it was trivial to trigger and we end up with
pipe full of uninitialized pages.  IOW, it's an infoleak.

Cc: stable@vger.kernel.org # v4.9
Reported-by: "Alan J. Wylie" <alan@wylie.me.uk>
Tested-by: "Alan J. Wylie" <alan@wylie.me.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 66 ++++++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 25f572303801..e68604ae3ced 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -730,43 +730,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-static void pipe_advance(struct iov_iter *i, size_t size)
+static inline void pipe_truncate(struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
-	struct pipe_buffer *buf;
-	int idx = i->idx;
-	size_t off = i->iov_offset, orig_sz;
-	
-	if (unlikely(i->count < size))
-		size = i->count;
-	orig_sz = size;
-
-	if (size) {
-		if (off) /* make it relative to the beginning of buffer */
-			size += off - pipe->bufs[idx].offset;
-		while (1) {
-			buf = &pipe->bufs[idx];
-			if (size <= buf->len)
-				break;
-			size -= buf->len;
-			idx = next_idx(idx, pipe);
-		}
-		buf->len = size;
-		i->idx = idx;
-		off = i->iov_offset = buf->offset + size;
-	}
-	if (off)
-		idx = next_idx(idx, pipe);
 	if (pipe->nrbufs) {
-		int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
-		/* [curbuf,unused) is in use.  Free [idx,unused) */
-		while (idx != unused) {
+		size_t off = i->iov_offset;
+		int idx = i->idx;
+		int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
+		if (off) {
+			pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
+			idx = next_idx(idx, pipe);
+			nrbufs++;
+		}
+		while (pipe->nrbufs > nrbufs) {
 			pipe_buf_release(pipe, &pipe->bufs[idx]);
 			idx = next_idx(idx, pipe);
 			pipe->nrbufs--;
 		}
 	}
-	i->count -= orig_sz;
+}
+
+static void pipe_advance(struct iov_iter *i, size_t size)
+{
+	struct pipe_inode_info *pipe = i->pipe;
+	if (unlikely(i->count < size))
+		size = i->count;
+	if (size) {
+		struct pipe_buffer *buf;
+		size_t off = i->iov_offset, left = size;
+		int idx = i->idx;
+		if (off) /* make it relative to the beginning of buffer */
+			left += off - pipe->bufs[idx].offset;
+		while (1) {
+			buf = &pipe->bufs[idx];
+			if (left <= buf->len)
+				break;
+			left -= buf->len;
+			idx = next_idx(idx, pipe);
+		}
+		i->idx = idx;
+		i->iov_offset = buf->offset + left;
+	}
+	i->count -= size;
+	/* ... and discard everything past that point */
+	pipe_truncate(i);
 }
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
@@ -826,6 +833,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
 			size_t count)
 {
 	BUG_ON(direction != ITER_PIPE);
+	WARN_ON(pipe->nrbufs == pipe->buffers);
 	i->type = direction;
 	i->pipe = pipe;
 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);

From 49def1853334396f948dcb4cedb9347abb318df5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 15 Jan 2017 16:21:59 -0800
Subject: [PATCH 297/297] Linux 4.10-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5f1a84735ff6..96e2352d10a8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Roaring Lionus
 
 # *DOCUMENTATION*