From 5185352c163a72cf969b2fbbfb89801b398896fd Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Aug 2011 14:48:11 -0700
Subject: [PATCH 01/77] libceph: fix msgpool

There were several problems here:

 1- we weren't tagging allocations with the pool, so they were never
    returned to the pool.
 2- msgpool_put didn't add back to the mempool, even it were called.
 3- msgpool_release didn't clear the pool pointer, so it would have looped
    had #1 not been broken.

These may or may not have been responsible for #1136 or #1381 (BUG due to
non-empty mempool on umount).  I can't seem to trigger the crash now using
the method I was using before.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/msgpool.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index d5f2d97ac05c..1f4cb30a42c5 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -7,27 +7,37 @@
 
 #include <linux/ceph/msgpool.h>
 
-static void *alloc_fn(gfp_t gfp_mask, void *arg)
+static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
 {
 	struct ceph_msgpool *pool = arg;
-	void *p;
+	struct ceph_msg *msg;
 
-	p = ceph_msg_new(0, pool->front_len, gfp_mask);
-	if (!p)
-		pr_err("msgpool %s alloc failed\n", pool->name);
-	return p;
+	msg = ceph_msg_new(0, pool->front_len, gfp_mask);
+	if (!msg) {
+		dout("msgpool_alloc %s failed\n", pool->name);
+	} else {
+		dout("msgpool_alloc %s %p\n", pool->name, msg);
+		msg->pool = pool;
+	}
+	return msg;
 }
 
-static void free_fn(void *element, void *arg)
+static void msgpool_free(void *element, void *arg)
 {
-	ceph_msg_put(element);
+	struct ceph_msgpool *pool = arg;
+	struct ceph_msg *msg = element;
+
+	dout("msgpool_release %s %p\n", pool->name, msg);
+	msg->pool = NULL;
+	ceph_msg_put(msg);
 }
 
 int ceph_msgpool_init(struct ceph_msgpool *pool,
 		      int front_len, int size, bool blocking, const char *name)
 {
+	dout("msgpool %s init\n", name);
 	pool->front_len = front_len;
-	pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
+	pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
 	if (!pool->pool)
 		return -ENOMEM;
 	pool->name = name;
@@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool,
 
 void ceph_msgpool_destroy(struct ceph_msgpool *pool)
 {
+	dout("msgpool %s destroy\n", pool->name);
 	mempool_destroy(pool->pool);
 }
 
 struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
 				  int front_len)
 {
+	struct ceph_msg *msg;
+
 	if (front_len > pool->front_len) {
-		pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
+		dout("msgpool_get %s need front %d, pool size is %d\n",
 		       pool->name, front_len, pool->front_len);
 		WARN_ON(1);
 
@@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
 		return ceph_msg_new(0, front_len, GFP_NOFS);
 	}
 
-	return mempool_alloc(pool->pool, GFP_NOFS);
+	msg = mempool_alloc(pool->pool, GFP_NOFS);
+	dout("msgpool_get %s %p\n", pool->name, msg);
+	return msg;
 }
 
 void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
 {
+	dout("msgpool_put %s %p\n", pool->name, msg);
+
 	/* reset msg front_len; user may have changed it */
 	msg->front.iov_len = pool->front_len;
 	msg->hdr.front_len = cpu_to_le32(pool->front_len);
 
 	kref_init(&msg->kref);  /* retake single ref */
+	mempool_free(msg, pool->pool);
 }

From 4f6fdf08681cecd9f38499de7a02eb4f05f399a7 Mon Sep 17 00:00:00 2001
From: Chase Douglas <chase.douglas@canonical.com>
Date: Fri, 5 Aug 2011 09:16:57 -0700
Subject: [PATCH 02/77] HID: magicmouse: Set resolution of touch surfaces

Add touch surface resolution information. The size of the touch surfaces
has been determined to the hundredth of a mm.

Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Michael Poole <mdpoole@troilus.org>
Cc: linux-input@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Chase Douglas <chase.douglas@canonical.com>
[jkosina@suse.cz: update comments and commit message]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-magicmouse.c | 56 +++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 0ec91c18a421..b5bdab3299bc 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -81,6 +81,28 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
 #define NO_TOUCHES -1
 #define SINGLE_TOUCH_UP -2
 
+/* Touch surface information. Dimension is in hundredths of a mm, min and max
+ * are in units. */
+#define MOUSE_DIMENSION_X (float)9056
+#define MOUSE_MIN_X -1100
+#define MOUSE_MAX_X 1258
+#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100))
+#define MOUSE_DIMENSION_Y (float)5152
+#define MOUSE_MIN_Y -1589
+#define MOUSE_MAX_Y 2047
+#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100))
+
+#define TRACKPAD_DIMENSION_X (float)13000
+#define TRACKPAD_MIN_X -2909
+#define TRACKPAD_MAX_X 3167
+#define TRACKPAD_RES_X \
+	((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100))
+#define TRACKPAD_DIMENSION_Y (float)11000
+#define TRACKPAD_MIN_Y -2456
+#define TRACKPAD_MAX_Y 2565
+#define TRACKPAD_RES_Y \
+	((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100))
+
 /**
  * struct magicmouse_sc - Tracks Magic Mouse-specific data.
  * @input: Input device through which we report events.
@@ -406,17 +428,31 @@ static void magicmouse_setup_input(struct input_dev *input, struct hid_device *h
 		 * inverse of the reported Y.
 		 */
 		if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
-			input_set_abs_params(input, ABS_MT_POSITION_X, -1100,
-				1358, 4, 0);
-			input_set_abs_params(input, ABS_MT_POSITION_Y, -1589,
-				2047, 4, 0);
+			input_set_abs_params(input, ABS_MT_POSITION_X,
+				MOUSE_MIN_X, MOUSE_MAX_X, 4, 0);
+			input_set_abs_params(input, ABS_MT_POSITION_Y,
+				MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0);
+
+			input_abs_set_res(input, ABS_MT_POSITION_X,
+				MOUSE_RES_X);
+			input_abs_set_res(input, ABS_MT_POSITION_Y,
+				MOUSE_RES_Y);
 		} else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
-			input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0);
-			input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0);
-			input_set_abs_params(input, ABS_MT_POSITION_X, -2909,
-				3167, 4, 0);
-			input_set_abs_params(input, ABS_MT_POSITION_Y, -2456,
-				2565, 4, 0);
+			input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X,
+				TRACKPAD_MAX_X, 4, 0);
+			input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y,
+				TRACKPAD_MAX_Y, 4, 0);
+			input_set_abs_params(input, ABS_MT_POSITION_X,
+				TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0);
+			input_set_abs_params(input, ABS_MT_POSITION_Y,
+				TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0);
+
+			input_abs_set_res(input, ABS_X, TRACKPAD_RES_X);
+			input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y);
+			input_abs_set_res(input, ABS_MT_POSITION_X,
+				TRACKPAD_RES_X);
+			input_abs_set_res(input, ABS_MT_POSITION_Y,
+				TRACKPAD_RES_Y);
 		}
 
 		input_set_events_per_packet(input, 60);

From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 4 Aug 2011 07:25:35 -0700
Subject: [PATCH 03/77] alarmtimers: Avoid possible null pointer traversal

We don't check if old_setting is non null before assigning it, so
correct this.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: stable@kernel.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/alarmtimer.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f98a04..1dee3f62a6a7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 	if (!rtcdev)
 		return -ENOTSUPP;
 
-	/* Save old values */
-	old_setting->it_interval =
-			ktime_to_timespec(timr->it.alarmtimer.period);
-	old_setting->it_value =
-			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+	if (old_setting)
+		alarm_timer_get(timr, old_setting);
 
 	/* If the timer was already set, cancel it */
 	alarm_cancel(&timr->it.alarmtimer);

From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 4 Aug 2011 07:51:56 -0700
Subject: [PATCH 04/77] alarmtimers: Memset itimerspec passed into
 alarm_timer_get

Following common_timer_get, zero out the itimerspec passed in.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: stable@kernel.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/alarmtimer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 1dee3f62a6a7..0e9263f6fd09 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
 static void alarm_timer_get(struct k_itimer *timr,
 				struct itimerspec *cur_setting)
 {
+	memset(cur_setting, 0, sizeof(struct itimerspec));
+
 	cur_setting->it_interval =
 			ktime_to_timespec(timr->it.alarmtimer.period);
 	cur_setting->it_value =

From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 10 Aug 2011 10:26:09 -0700
Subject: [PATCH 05/77] alarmtimers: Avoid possible denial of service with high
 freq periodic timers

Its possible to jam up the alarm timers by setting very small interval
timers, which will cause the alarmtimer subsystem to spend all of its time
firing and restarting timers. This can effectivly lock up a box.

A deeper fix is needed, closely mimicking the hrtimer code, but for now
just cap the interval to 100us to avoid userland hanging the system.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: stable@kernel.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/alarmtimer.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0e9263f6fd09..ea5e1a928d5b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 	if (!rtcdev)
 		return -ENOTSUPP;
 
+	/*
+	 * XXX HACK! Currently we can DOS a system if the interval
+	 * period on alarmtimers is too small. Cap the interval here
+	 * to 100us and solve this properly in a future patch! -jstultz
+	 */
+	if ((new_setting->it_interval.tv_sec == 0) &&
+			(new_setting->it_interval.tv_nsec < 100000))
+		new_setting->it_interval.tv_nsec = 100000;
+
 	if (old_setting)
 		alarm_timer_get(timr, old_setting);
 

From bf6ed027bcc93f8d54d321fe87f0434b25699eb1 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 10 Aug 2011 21:11:26 +0800
Subject: [PATCH 06/77] rtc: ep93xx: Fix 'rtc' may be used uninitialized
 warning

commit 92d921c5d "rtc: ep93xx: Initialize drvdata before registering device"
ensures the drvdata is initialized prior to registering the rtc device.
But it set the drvdata to an uninitialized pointer.
Thus calling platform_get_drvdata in ep93xx_rtc_remove does not get correct address.

This patch fixes below warning by adding struct rtc_device *rtc to struct ep93xx_rtc.
Then set platform drvdata to ep93xx_rtc instead of rtc.

  CC      drivers/rtc/rtc-ep93xx.o
drivers/rtc/rtc-ep93xx.c: In function 'ep93xx_rtc_probe':
drivers/rtc/rtc-ep93xx.c:154: warning: 'rtc' may be used uninitialized in this function

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-ep93xx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 335551d333b2..14a42a1edc66 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -36,6 +36,7 @@
  */
 struct ep93xx_rtc {
 	void __iomem	*mmio_base;
+	struct rtc_device *rtc;
 };
 
 static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload,
@@ -130,7 +131,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 {
 	struct ep93xx_rtc *ep93xx_rtc;
 	struct resource *res;
-	struct rtc_device *rtc;
 	int err;
 
 	ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL);
@@ -151,12 +151,12 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 		return -ENXIO;
 
 	pdev->dev.platform_data = ep93xx_rtc;
-	platform_set_drvdata(pdev, rtc);
+	platform_set_drvdata(pdev, ep93xx_rtc);
 
-	rtc = rtc_device_register(pdev->name,
+	ep93xx_rtc->rtc = rtc_device_register(pdev->name,
 				&pdev->dev, &ep93xx_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		err = PTR_ERR(rtc);
+	if (IS_ERR(ep93xx_rtc->rtc)) {
+		err = PTR_ERR(ep93xx_rtc->rtc);
 		goto exit;
 	}
 
@@ -167,7 +167,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 	return 0;
 
 fail:
-	rtc_device_unregister(rtc);
+	rtc_device_unregister(ep93xx_rtc->rtc);
 exit:
 	platform_set_drvdata(pdev, NULL);
 	pdev->dev.platform_data = NULL;
@@ -176,11 +176,11 @@ exit:
 
 static int __exit ep93xx_rtc_remove(struct platform_device *pdev)
 {
-	struct rtc_device *rtc = platform_get_drvdata(pdev);
+	struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev);
 
 	sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files);
 	platform_set_drvdata(pdev, NULL);
-	rtc_device_unregister(rtc);
+	rtc_device_unregister(ep93xx_rtc->rtc);
 	pdev->dev.platform_data = NULL;
 
 	return 0;

From dec35d19c4ec65b94df3b27b6e373f0d48c9cd32 Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Date: Wed, 16 Mar 2011 06:07:14 +0000
Subject: [PATCH 07/77] rtc: rtc-twl: Switch to using threaded irq

The driver is accessing to i2c bus in interrupt handler.
Therefore, it should use threaded irq.

Signed-off-by: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Acked-by: Balaji T K <balajitk@ti.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-twl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 9a81f778d6b2..1963cddbf214 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -462,7 +462,7 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto out1;
 
-	ret = request_irq(irq, twl_rtc_interrupt,
+	ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
 				IRQF_TRIGGER_RISING,
 				dev_name(&rtc->dev), rtc);
 	if (ret < 0) {

From 34d623d11316cb69f9e8cc5eb50d3792b5c302b6 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@debian.org>
Date: Tue, 31 May 2011 08:51:39 +0000
Subject: [PATCH 08/77] rtc: rtc-twl: Remove lockdep related local_irq_enable()

Now that the irq is properly threaded (due to it needing i2c access)
we should also remove the local_irq_enable() call in twl_rtc_interrupt.
Testing this with Pandaboard, the RTC is still working.

[Reworked commit message -jstultz]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-twl.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 1963cddbf214..9677bbc433f9 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -362,14 +362,6 @@ static irqreturn_t twl_rtc_interrupt(int irq, void *rtc)
 	int res;
 	u8 rd_reg;
 
-#ifdef CONFIG_LOCKDEP
-	/* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
-	 * we don't want and can't tolerate.  Although it might be
-	 * friendlier not to borrow this thread context...
-	 */
-	local_irq_enable();
-#endif
-
 	res = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
 	if (res)
 		goto out;

From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 22 Jul 2011 09:12:51 +0000
Subject: [PATCH 09/77] rtc: Fix RTC PIE frequency limit

Thomas earlier submitted a fix to limit the RTC PIE freq, but
picked 5000Hz out of the air. Willy noticed that we should
instead use the 8192Hz max from the rtc man documentation.

Cc: Willy Tarreau <w@1wt.eu>
Cc: stable@kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/interface.c | 2 +-
 include/linux/rtc.h     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 3195dbd3ec34..eb4c88316a15 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 	int err = 0;
 	unsigned long flags;
 
-	if (freq <= 0 || freq > 5000)
+	if (freq <= 0 || freq > RTC_MAX_FREQ)
 		return -EINVAL;
 retry:
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b27ebea25660..93f4d035076b 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -97,6 +97,9 @@ struct rtc_pll_info {
 #define RTC_AF 0x20	/* Alarm interrupt */
 #define RTC_UF 0x10	/* Update interrupt for 1Hz RTC */
 
+
+#define RTC_MAX_FREQ	8192
+
 #ifdef __KERNEL__
 
 #include <linux/types.h>

From 795858dbd253462a67e14272edeaae73c6074b17 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Aug 2011 13:02:37 -0700
Subject: [PATCH 10/77] ceph: fix encoding of ino only (not relative) paths

A 'path' consists of a starting ino and relative component.  Encode even
when there is no relative component.  This is primarily needed by the
NFS reexport code.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fee028b5332e..86c59e16ba74 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 		r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
 		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
 		     *ppath);
-	} else if (rpath) {
+	} else if (rpath || rino) {
 		*ino = rino;
 		*ppath = rpath;
 		*pathlen = strlen(rpath);

From 016f1c54408b1e92e2e8087bfc05ca0a9c258513 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Thu, 11 Aug 2011 12:29:46 +0200
Subject: [PATCH 11/77] UBIFS: not build debug messages with
 CONFIG_UBIFS_FS_DEBUG disabled

With
  $ grep -e UBIFS_FS_DEBUG -e DYNAMIC_DEBUG .config
  # CONFIG_UBIFS_FS_DEBUG is not set
  CONFIG_DYNAMIC_DEBUG=y

Debug messages are kept in the object files due to the
dynamic_pr_debug() macro, even if they are never going to be printed:
  $ make fs/ubifs/super.o
  $ strings fs/ubifs/super.o | grep 'compiled on'
  compiled on:         Aug 11 2011 at 12:21:38

Use plain printk to fix this.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
---
 fs/ubifs/debug.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 45174b534377..feb361e252ac 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
 
-#define ubifs_dbg_msg(fmt, ...) do {               \
-	if (0)                                     \
-		pr_debug(fmt "\n", ##__VA_ARGS__); \
+#define ubifs_dbg_msg(fmt, ...) do {                        \
+	if (0)                                              \
+		printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \
 } while (0)
 
 #define dbg_dump_stack()

From 9efabc84768ee8e79b50ad6ad6cff94d66da01f7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@intel.com>
Date: Fri, 19 Aug 2011 19:02:27 +0300
Subject: [PATCH 12/77] UBI: do not link debug messages when debugging is
 disabled

Michal Marek spotted the same issue in UBIFS and this patch fixes UBI,
see "UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled"

When UBI debugging is disabled, we have debugging messages defined as:

if (0)
	pr_debug()

But pr_debug macro defines data structures with debugging data and makes
the linux binary larger, even though we have "if (0)".

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
---
 drivers/mtd/ubi/debug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 65b5b76cc379..64fbb0021825 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -181,7 +181,7 @@ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi)
 
 #define ubi_dbg_msg(fmt, ...) do {                                           \
 	if (0)                                                               \
-		pr_debug(fmt "\n", ##__VA_ARGS__);                           \
+		printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__);                  \
 } while (0)
 
 #define dbg_msg(fmt, ...)  ubi_dbg_msg(fmt, ##__VA_ARGS__)

From 259a187ade45056fd44856654f78aa9e9f0f7c75 Mon Sep 17 00:00:00 2001
From: Noah Watkins <noahwatkins@gmail.com>
Date: Mon, 22 Aug 2011 13:49:41 -0600
Subject: [PATCH 13/77] ceph: fix memory leak

kfree does not clean up indirect allocations in
ceph_fs_client and ceph_options (e.g. snapdir_name).

Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d47c5ec7fb1f..88bacaf385d9 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
 	fsc = create_fs_client(fsopt, opt);
 	if (IS_ERR(fsc)) {
 		res = ERR_CAST(fsc);
-		kfree(fsopt);
-		kfree(opt);
+		destroy_mount_options(fsopt);
+		ceph_destroy_options(opt);
 		goto out_final;
 	}
 

From 35d851df23b093ee027f827fed2213ae5e88fc7a Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 25 Aug 2011 14:21:37 +0200
Subject: [PATCH 14/77] HID: magicmouse: ignore 'ivalid report id' while
 switching modes, v2

This is basically a more generic respin of 23746a6 ("HID: magicmouse: ignore
'ivalid report id' while switching modes") which got reverted later by
c3a492.

It turns out that on some configurations, this is actually still the case
and we are not able to detect in runtime.

The device reponds with 'invalid report id' when feature report switching it
into multitouch mode is sent to it.

This has been silently ignored before 0825411ade ("HID: bt: Wait for ACK
on Sent Reports"), but since this commit, it propagates -EIO from the _raw
callback .

So let the driver ignore -EIO as response to 0xd7,0x01 report, as that's
how the device reacts in normal mode.

Sad, but following reality.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=35022

Reported-by: Chase Douglas <chase.douglas@canonical.com>
Reported-by: Jaikumar Ganesh <jaikumarg@android.com>
Tested-by: Chase Douglas <chase.douglas@canonical.com>
Tested-by: Jaikumar Ganesh <jaikumarg@android.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-magicmouse.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index b5bdab3299bc..f0fbd7bd239e 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -537,9 +537,17 @@ static int magicmouse_probe(struct hid_device *hdev,
 	}
 	report->size = 6;
 
+	/*
+	 * Some devices repond with 'invalid report id' when feature
+	 * report switching it into multitouch mode is sent to it.
+	 *
+	 * This results in -EIO from the _raw low-level transport callback,
+	 * but there seems to be no other way of switching the mode.
+	 * Thus the super-ugly hacky success check below.
+	 */
 	ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature),
 			HID_FEATURE_REPORT);
-	if (ret != sizeof(feature)) {
+	if (ret != -EIO && ret != sizeof(feature)) {
 		hid_err(hdev, "unable to request touch data (%d)\n", ret);
 		goto err_stop_hw;
 	}

From 6d1db0777981e1626ae71243984ac300b61789ff Mon Sep 17 00:00:00 2001
From: Clemens Werther <clemens.werther@gmail.com>
Date: Thu, 25 Aug 2011 15:35:14 +0200
Subject: [PATCH 15/77] HID: add support for HuiJia USB Gamepad connector

Create each gamepad as a separate joystick

Signed-off-by: Clemens Werther <clemens.werther@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 1 +
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 7d27d2b0445a..7484e1b67249 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -277,6 +277,7 @@
 #define USB_DEVICE_ID_PENPOWER		0x00f4
 
 #define USB_VENDOR_ID_GREENASIA		0x0e8f
+#define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD	0x3013
 
 #define USB_VENDOR_ID_GRETAGMACBETH	0x0971
 #define USB_DEVICE_ID_GRETAGMACBETH_HUEY	0x2005
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 4bdb5d46c52c..3146fdcda272 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -47,6 +47,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL },
 
 	{ USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT },
+	{ USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS },
 	{ USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT },

From a7402deb324f62106566f5a95199a54c41e200ef Mon Sep 17 00:00:00 2001
From: Mike Waychison <mikew@google.com>
Date: Fri, 12 Aug 2011 21:04:30 +0000
Subject: [PATCH 16/77] rtc: Initialized rtc_time->tm_isdst

Even though the Linux kernel does not use the tm_isdst field, it is
exposed as part of the ABI.  This field can accidentally be left
initialized, which is why we currently memset buffers returned to
userland in rtc_read_time.

There is a case however where the field can return garbage from the
stack though when using the RTC_ALM_READ ioctl on the rtc device.  This
ioctl invokes rtc_read_alarm, which is careful to memset the rtc_wkalrm
buffer that is copied to userland, but it then uses a struct copy to
assign to alarm->time given the return value from rtc_ktime_to_tm().

rtc_ktime_to_tm() is implemented by calling rtc_time_to_tm using a
derivative seconds counds from ktime, but rtc_time_to_tm does not assign
a value to ->tm_isdst.  This results in garbage from rtc_ktime_to_tm()'s
frame ending up being copied out to userland as part of the returned
rtc_wkalrm.

Fix this by initializing rtc_time->tm_isdst to 0 in rtc_time_to_tm.

Signed-off-by: Mike Waychison <mikew@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-lib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index 075f1708deae..c4cf05731118 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -85,6 +85,8 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
 	time -= tm->tm_hour * 3600;
 	tm->tm_min = time / 60;
 	tm->tm_sec = time - tm->tm_min * 60;
+
+	tm->tm_isdst = 0;
 }
 EXPORT_SYMBOL(rtc_time_to_tm);
 

From 7e72c686347562b4a275c97b4bdd7a79c1f23c65 Mon Sep 17 00:00:00 2001
From: Todd Poynor <toddpoynor@google.com>
Date: Wed, 10 Aug 2011 20:20:36 -0700
Subject: [PATCH 17/77] rtc: twl: Fix registration vs. init order

Only register as an RTC device after the hardware has been
successfully initialized.  The RTC class driver will call
back to this driver to read a pending alarm, and other
drivers watching for new devices on the RTC class may
read the RTC time upon registration.  Such access might
occur while the RTC is stopped, prior to clearing
pending alarms, etc.

The new ordering also avoids leaving the platform
device drvdata set to an unregistered struct rtc_device *
on probe errors.

Signed-off-by: Todd Poynor <toddpoynor@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-twl.c | 54 ++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 9677bbc433f9..20687d55e7a7 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -420,24 +420,12 @@ static struct rtc_class_ops twl_rtc_ops = {
 static int __devinit twl_rtc_probe(struct platform_device *pdev)
 {
 	struct rtc_device *rtc;
-	int ret = 0;
+	int ret = -EINVAL;
 	int irq = platform_get_irq(pdev, 0);
 	u8 rd_reg;
 
 	if (irq <= 0)
-		return -EINVAL;
-
-	rtc = rtc_device_register(pdev->name,
-				  &pdev->dev, &twl_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
-		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
-			PTR_ERR(rtc));
-		goto out0;
-
-	}
-
-	platform_set_drvdata(pdev, rtc);
+		goto out1;
 
 	ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
 	if (ret < 0)
@@ -454,14 +442,6 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto out1;
 
-	ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
-				IRQF_TRIGGER_RISING,
-				dev_name(&rtc->dev), rtc);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "IRQ is not free.\n");
-		goto out1;
-	}
-
 	if (twl_class_is_6030()) {
 		twl6030_interrupt_unmask(TWL6030_RTC_INT_MASK,
 			REG_INT_MSK_LINE_A);
@@ -472,28 +452,44 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
 	/* Check RTC module status, Enable if it is off */
 	ret = twl_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG);
 	if (ret < 0)
-		goto out2;
+		goto out1;
 
 	if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) {
 		dev_info(&pdev->dev, "Enabling TWL-RTC.\n");
 		rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M;
 		ret = twl_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG);
 		if (ret < 0)
-			goto out2;
+			goto out1;
 	}
 
 	/* init cached IRQ enable bits */
 	ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
 	if (ret < 0)
-		goto out2;
+		goto out1;
 
-	return ret;
+	rtc = rtc_device_register(pdev->name,
+				  &pdev->dev, &twl_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		ret = PTR_ERR(rtc);
+		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
+			PTR_ERR(rtc));
+		goto out1;
+	}
+
+	ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
+				   IRQF_TRIGGER_RISING,
+				   dev_name(&rtc->dev), rtc);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "IRQ is not free.\n");
+		goto out2;
+	}
+
+	platform_set_drvdata(pdev, rtc);
+	return 0;
 
 out2:
-	free_irq(irq, rtc);
-out1:
 	rtc_device_unregister(rtc);
-out0:
+out1:
 	return ret;
 }
 

From 66506f761772c87fd4ff31b94b298888d5d58d77 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Mon, 15 Aug 2011 10:28:18 +0800
Subject: [PATCH 18/77] mmc: sdhci-esdhc-imx: add missing inclusion of
 linux/module.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are the following warnings and errorx when compiling the driver.
The patch adds the missing inclusion of linux/module.h to fix them.

drivers/mmc/host/sdhci-esdhc-imx.c:563:12: error: ‘THIS_MODULE’ undeclared here (not in a function)
[..]

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 0e9780f5a4a9..4dc0028086a3 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -16,6 +16,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/gpio.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>

From c259e01a1ec90063042f758e409cd26b2a0963c8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 22 Jun 2011 19:47:00 +0200
Subject: [PATCH 19/77] sched: Separate the scheduler entry for preemption

Block-IO and workqueues call into notifier functions from the
scheduler core code with interrupts and preemption disabled. These
calls should be made before entering the scheduler core.

To simplify this, separate the scheduler core code into
__schedule(). __schedule() is directly called from the places which
set PREEMPT_ACTIVE and from schedule(). This allows us to add the work
checks into schedule(), so they are only called when a task voluntary
goes to sleep.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@kernel.org # 2.6.39+
Link: http://lkml.kernel.org/r/20110622174918.813258321@linutronix.de
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbdecf45..ec15e8129cf7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq)
 }
 
 /*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
  */
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
@@ -4369,6 +4369,11 @@ need_resched:
 	if (need_resched())
 		goto need_resched;
 }
+
+asmlinkage void schedule(void)
+{
+	__schedule();
+}
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4440,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
 
 	do {
 		add_preempt_count_notrace(PREEMPT_ACTIVE);
-		schedule();
+		__schedule();
 		sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
 		/*
@@ -4463,7 +4468,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
 		local_irq_enable();
-		schedule();
+		__schedule();
 		local_irq_disable();
 		sub_preempt_count(PREEMPT_ACTIVE);
 
@@ -5588,7 +5593,7 @@ static inline int should_resched(void)
 static void __cond_resched(void)
 {
 	add_preempt_count(PREEMPT_ACTIVE);
-	schedule();
+	__schedule();
 	sub_preempt_count(PREEMPT_ACTIVE);
 }
 

From 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 22 Jun 2011 19:47:01 +0200
Subject: [PATCH 20/77] sched: Move blk_schedule_flush_plug() out of
 __schedule()

There is no real reason to run blk_schedule_flush_plug() with
interrupts and preemption disabled.

Move it into schedule() and call it when the task is going voluntarily
to sleep. There might be false positives when the task is woken
between that call and actually scheduling, but that's not really
different from being woken immediately after switching away.

This fixes a deadlock in the scheduler where the
blk_schedule_flush_plug() callchain enables interrupts and thereby
allows a wakeup to happen of the task that's going to sleep.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@kernel.org # 2.6.39+
Link: http://lkml.kernel.org/n/tip-dwfxtra7yg1b5r65m32ywtct@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index ec15e8129cf7..511732c39b6e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4322,16 +4322,6 @@ need_resched:
 				if (to_wakeup)
 					try_to_wake_up_local(to_wakeup);
 			}
-
-			/*
-			 * If we are going to sleep and we have plugged IO
-			 * queued, make sure to submit it to avoid deadlocks.
-			 */
-			if (blk_needs_flush_plug(prev)) {
-				raw_spin_unlock(&rq->lock);
-				blk_schedule_flush_plug(prev);
-				raw_spin_lock(&rq->lock);
-			}
 		}
 		switch_count = &prev->nvcsw;
 	}
@@ -4370,8 +4360,23 @@ need_resched:
 		goto need_resched;
 }
 
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+	if (!tsk->state)
+		return;
+	/*
+	 * If we are going to sleep and we have plugged IO queued,
+	 * make sure to submit it to avoid deadlocks.
+	 */
+	if (blk_needs_flush_plug(tsk))
+		blk_schedule_flush_plug(tsk);
+}
+
 asmlinkage void schedule(void)
 {
+	struct task_struct *tsk = current;
+
+	sched_submit_work(tsk);
 	__schedule();
 }
 EXPORT_SYMBOL(schedule);

From feff8fa0075bdfd43c841e9d689ed81adda988d6 Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 18 Aug 2011 20:36:57 +0800
Subject: [PATCH 21/77] sched: Fix a memory leak in __sdt_free()

This patch fixes the following memory leak:

unreferenced object 0xffff880107266800 (size 512):
  comm "sched-powersave", pid 3718, jiffies 4323097853 (age 27495.450s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff81133940>] create_object+0x187/0x28b
    [<ffffffff814ac103>] kmemleak_alloc+0x73/0x98
    [<ffffffff811232ba>] __kmalloc_node+0x104/0x159
    [<ffffffff81044b98>] kzalloc_node.clone.97+0x15/0x17
    [<ffffffff8104cb90>] build_sched_domains+0xb7/0x7f3
    [<ffffffff8104d4df>] partition_sched_domains+0x1db/0x24a
    [<ffffffff8109ee4a>] do_rebuild_sched_domains+0x3b/0x47
    [<ffffffff810a00c7>] rebuild_sched_domains+0x10/0x12
    [<ffffffff8104d5ba>] sched_power_savings_store+0x6c/0x7b
    [<ffffffff8104d5df>] sched_mc_power_savings_store+0x16/0x18
    [<ffffffff8131322c>] sysdev_class_store+0x20/0x22
    [<ffffffff81193876>] sysfs_write_file+0x108/0x144
    [<ffffffff81135b10>] vfs_write+0xaf/0x102
    [<ffffffff81135d23>] sys_write+0x4d/0x74
    [<ffffffff814c8a42>] system_call_fastpath+0x16/0x1b
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org # 3.0
Link: http://lkml.kernel.org/r/1313671017-4112-1-git-send-email-amwang@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched.c b/kernel/sched.c
index 511732c39b6e..c79e7c63a4aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7453,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
 			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
 			if (sd && (sd->flags & SD_OVERLAP))
 				free_sched_groups(sd->groups, 0);
+			kfree(*per_cpu_ptr(sdd->sd, j));
 			kfree(*per_cpu_ptr(sdd->sg, j));
 			kfree(*per_cpu_ptr(sdd->sgp, j));
 		}

From a8d757ef076f0f95f13a918808824058de25b3eb Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 25 Aug 2011 15:58:03 +0200
Subject: [PATCH 22/77] perf events: Fix slow and broken cgroup context switch
 code

The current cgroup context switch code was incorrect leading
to bogus counts. Furthermore, as soon as there was an active
cgroup event on a CPU, the context switch cost on that CPU
would increase by a significant amount as demonstrated by a
simple ping/pong example:

 $ ./pong
 Both processes pinned to CPU1, running for 10s
 10684.51 ctxsw/s

Now start a cgroup perf stat:
 $ perf stat -e cycles,cycles -A -a -G test  -C 1 -- sleep 100

$ ./pong
 Both processes pinned to CPU1, running for 10s
 6674.61 ctxsw/s

That's a 37% penalty.

Note that pong is not even in the monitored cgroup.

The results shown by perf stat are bogus:
 $ perf stat -e cycles,cycles -A -a -G test  -C 1 -- sleep 100

 Performance counter stats for 'sleep 100':

 CPU1 <not counted> cycles   test
 CPU1 16,984,189,138 cycles  #    0.000 GHz

The second 'cycles' event should report a count @ CPU clock
(here 2.4GHz) as it is counting across all cgroups.

The patch below fixes the bogus accounting and bypasses any
cgroup switches in case the outgoing and incoming tasks are
in the same cgroup.

With this patch the same test now yields:
 $ ./pong
 Both processes pinned to CPU1, running for 10s
 10775.30 ctxsw/s

Start perf stat with cgroup:

 $ perf stat -e cycles,cycles -A -a -G test  -C 1 -- sleep 10

Run pong outside the cgroup:
 $ /pong
 Both processes pinned to CPU1, running for 10s
 10687.80 ctxsw/s

The penalty is now less than 2%.

And the results for perf stat are correct:

$ perf stat -e cycles,cycles -A -a -G test  -C 1 -- sleep 10

 Performance counter stats for 'sleep 10':

 CPU1 <not counted> cycles test #    0.000 GHz
 CPU1 23,933,981,448 cycles      #    0.000 GHz

Now perf stat reports the correct counts for
for the non cgroup event.

If we run pong inside the cgroup, then we also get the
correct counts:

$ perf stat -e cycles,cycles -A -a -G test  -C 1 -- sleep 10

 Performance counter stats for 'sleep 10':

 CPU1 22,297,726,205 cycles test #    0.000 GHz
 CPU1 23,933,981,448 cycles      #    0.000 GHz

      10.001457237 seconds time elapsed

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110825135803.GA4697@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 24 +++++++++------
 kernel/events/core.c       | 63 ++++++++++++++++++++++++++++++++------
 kernel/sched.c             |  2 +-
 3 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 245bafdafd5e..c816075c01ce 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -944,8 +944,10 @@ extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+extern void __perf_event_task_sched_in(struct task_struct *prev,
+				       struct task_struct *task);
+extern void __perf_event_task_sched_out(struct task_struct *prev,
+					struct task_struct *next);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1059,17 +1061,20 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 
 extern struct jump_label_key perf_sched_events;
 
-static inline void perf_event_task_sched_in(struct task_struct *task)
+static inline void perf_event_task_sched_in(struct task_struct *prev,
+					    struct task_struct *task)
 {
 	if (static_branch(&perf_sched_events))
-		__perf_event_task_sched_in(task);
+		__perf_event_task_sched_in(prev, task);
 }
 
-static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+static inline void perf_event_task_sched_out(struct task_struct *prev,
+					     struct task_struct *next)
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
-	__perf_event_task_sched_out(task, next);
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_out(prev, next);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1139,10 +1144,11 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else
 static inline void
-perf_event_task_sched_in(struct task_struct *task)			{ }
+perf_event_task_sched_in(struct task_struct *prev,
+			 struct task_struct *task)			{ }
 static inline void
-perf_event_task_sched_out(struct task_struct *task,
-			    struct task_struct *next)			{ }
+perf_event_task_sched_out(struct task_struct *prev,
+			  struct task_struct *next)			{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..45847fbb599a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 	local_irq_restore(flags);
 }
 
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+					 struct task_struct *next)
 {
-	perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+	struct perf_cgroup *cgrp1;
+	struct perf_cgroup *cgrp2 = NULL;
+
+	/*
+	 * we come here when we know perf_cgroup_events > 0
+	 */
+	cgrp1 = perf_cgroup_from_task(task);
+
+	/*
+	 * next is NULL when called from perf_event_enable_on_exec()
+	 * that will systematically cause a cgroup_switch()
+	 */
+	if (next)
+		cgrp2 = perf_cgroup_from_task(next);
+
+	/*
+	 * only schedule out current cgroup events if we know
+	 * that we are switching to a different cgroup. Otherwise,
+	 * do no touch the cgroup events.
+	 */
+	if (cgrp1 != cgrp2)
+		perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
 }
 
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+					struct task_struct *task)
 {
-	perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+	struct perf_cgroup *cgrp1;
+	struct perf_cgroup *cgrp2 = NULL;
+
+	/*
+	 * we come here when we know perf_cgroup_events > 0
+	 */
+	cgrp1 = perf_cgroup_from_task(task);
+
+	/* prev can never be NULL */
+	cgrp2 = perf_cgroup_from_task(prev);
+
+	/*
+	 * only need to schedule in cgroup events if we are changing
+	 * cgroup during ctxsw. Cgroup events were not scheduled
+	 * out of ctxsw out if that was not the case.
+	 */
+	if (cgrp1 != cgrp2)
+		perf_cgroup_switch(task, PERF_CGROUP_SWIN);
 }
 
 static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
 {
 }
 
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+					 struct task_struct *next)
 {
 }
 
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+					struct task_struct *task)
 {
 }
 
@@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 	 * cgroup event are system-wide mode only
 	 */
 	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
-		perf_cgroup_sched_out(task);
+		perf_cgroup_sched_out(task, next);
 }
 
 static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+				struct task_struct *task)
 {
 	struct perf_event_context *ctx;
 	int ctxn;
@@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
 	 * cgroup event are system-wide mode only
 	 */
 	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
-		perf_cgroup_sched_in(task);
+		perf_cgroup_sched_in(prev, task);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 	 * ctxswin cgroup events which are already scheduled
 	 * in.
 	 */
-	perf_cgroup_sched_out(current);
+	perf_cgroup_sched_out(current, NULL);
 
 	raw_spin_lock(&ctx->lock);
 	task_ctx_sched_out(ctx);
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbdecf45..0408cdc6d572 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	local_irq_disable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
-	perf_event_task_sched_in(current);
+	perf_event_task_sched_in(prev, current);
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */

From 3b217116edaac634bf31e85c35708298059a8171 Mon Sep 17 00:00:00 2001
From: Duncan Sands <baldrick@free.fr>
Date: Tue, 30 Aug 2011 10:58:22 +0200
Subject: [PATCH 23/77] KVM: Fix instruction size issue in pvclock scaling

Commit de2d1a524e94 ("KVM: Fix register corruption in pvclock_scale_delta")
introduced a mul instruction that may have only a memory operand; the
assembler therefore cannot select the correct size:

   pvclock.s:229: Error: no instruction mnemonic suffix given and no register
operands; can't size instruction

In this example the assembler is:

         #APP
         mul -48(%rbp) ; shrd $32, %rdx, %rax
         #NO_APP

A simple solution is to use mulq.

Signed-off-by: Duncan Sands <baldrick@free.fr>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/pvclock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a518c0a45044..c59cc97fe6c1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
 #elif defined(__x86_64__)
 	__asm__ (
-		"mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+		"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
 		: [lo]"=a"(product),
 		  [hi]"=d"(tmp)
 		: "0"(delta),

From 7f310a5d4e8525ac0cc2f58c973d2100ce034410 Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@mgebm.net>
Date: Thu, 23 Jun 2011 16:34:38 -0400
Subject: [PATCH 24/77] perf_event: Fix broken calc_timer_values()

We detected a serious issue with PERF_SAMPLE_READ and
timing information when events were being multiplexing.

Samples would have time_running > time_enabled. That
was easy to reproduce with a libpfm4 example (ran 3
times to cause multiplexing on Core 2):

 $ syst_smpl -e uops_retired:freq=1 &
 $ syst_smpl -e uops_retired:freq=1 &
 $ syst_smpl -e uops_retired:freq=1 &
 IIP:0x0000000040062d ... PERIOD:2355332948 ENA=40144625315 RUN=60014875184
 syst_smpl: WARNING: time_running > time_enabled
	63277537998 uops_retired:freq=1 , scaled

The bug was not present in kernel up to (and including) 3.0. It turns
out the bug was introduced by the following commit:

commit c4794295917ebeda8013b6cb9c8d71ab4f74a1fa

    events: Move lockless timer calculation into helper function

The parameters of the function got reversed yet the call sites
were not updated to reflect the change. That lead to time_running
and time_enabled being swapped. That had no effect when there was
no multiplexing because in that case time_running = time_enabled
but it would show up in any other scenario.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110829124112.GA4828@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 45847fbb599a..0f857782d06f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3396,8 +3396,8 @@ static int perf_event_index(struct perf_event *event)
 }
 
 static void calc_timer_values(struct perf_event *event,
-				u64 *running,
-				u64 *enabled)
+				u64 *enabled,
+				u64 *running)
 {
 	u64 now, ctx_time;
 

From 20afc60f892d285fde179ead4b24e6a7938c2f1b Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 30 Aug 2011 12:32:36 +0400
Subject: [PATCH 25/77] x86, perf: Check that current->mm is alive before
 getting user callchain

An event may occur when an mm is already released.

I added an event in dequeue_entity() and caught a panic with
the following backtrace:

[  434.421110] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
[  434.421258] IP: [<ffffffff810464ac>] __get_user_pages_fast+0x9c/0x120
...
[  434.421258] Call Trace:
[  434.421258]  [<ffffffff8101ae81>] copy_from_user_nmi+0x51/0xf0
[  434.421258]  [<ffffffff8109a0d5>] ? sched_clock_local+0x25/0x90
[  434.421258]  [<ffffffff8101b048>] perf_callchain_user+0x128/0x170
[  434.421258]  [<ffffffff811154cd>] ? __perf_event_header__init_id+0xed/0x100
[  434.421258]  [<ffffffff81116690>] perf_prepare_sample+0x200/0x280
[  434.421258]  [<ffffffff81118da8>] __perf_event_overflow+0x1b8/0x290
[  434.421258]  [<ffffffff81065240>] ? tg_shares_up+0x0/0x670
[  434.421258]  [<ffffffff8104fe1a>] ? walk_tg_tree+0x6a/0xb0
[  434.421258]  [<ffffffff81118f44>] perf_swevent_overflow+0xc4/0xf0
[  434.421258]  [<ffffffff81119150>] do_perf_sw_event+0x1e0/0x250
[  434.421258]  [<ffffffff81119204>] perf_tp_event+0x44/0x70
[  434.421258]  [<ffffffff8105701f>] ftrace_profile_sched_block+0xdf/0x110
[  434.421258]  [<ffffffff8106121d>] dequeue_entity+0x2ad/0x2d0
[  434.421258]  [<ffffffff810614ec>] dequeue_task_fair+0x1c/0x60
[  434.421258]  [<ffffffff8105818a>] dequeue_task+0x9a/0xb0
[  434.421258]  [<ffffffff810581e2>] deactivate_task+0x42/0xe0
[  434.421258]  [<ffffffff814bc019>] thread_return+0x191/0x808
[  434.421258]  [<ffffffff81098a44>] ? switch_task_namespaces+0x24/0x60
[  434.421258]  [<ffffffff8106f4c4>] do_exit+0x464/0x910
[  434.421258]  [<ffffffff8106f9c8>] do_group_exit+0x58/0xd0
[  434.421258]  [<ffffffff8106fa57>] sys_exit_group+0x17/0x20
[  434.421258]  [<ffffffff8100b202>] system_call_fastpath+0x16/0x1b

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/1314693156-24131-1-git-send-email-avagin@openvz.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf20ed6..cfa62ec090ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	perf_callchain_store(entry, regs->ip);
 
+	if (!current->mm)
+		return;
+
 	if (perf_callchain_user32(regs, entry))
 		return;
 

From 08c14071fda4e69abb9d5b1566651cd092b158d3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 18 Aug 2011 15:23:47 +0300
Subject: [PATCH 26/77] mmc: rename mmc_host_clk_{ungate|gate} to
 mmc_host_clk_{hold|release}

As per suggestion by Linus Walleij:

  > If you think the names of the functions are confusing then
  > you may rename them, say like this:
  >
  > mmc_host_clk_ungate() -> mmc_host_clk_hold()
  > mmc_host_clk_gate() -> mmc_host_clk_release()
  >
  > Which would make the usecases more clear

(This is CC'd to stable@ because the next two patches, which fix
observable races, depend on it.)

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c |  4 ++--
 drivers/mmc/core/host.c | 10 +++++-----
 drivers/mmc/core/host.h |  8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 91a0a7460ebb..63ffc65f84af 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -133,7 +133,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 		if (mrq->done)
 			mrq->done(mrq);
 
-		mmc_host_clk_gate(host);
+		mmc_host_clk_release(host);
 	}
 }
 
@@ -192,7 +192,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 			mrq->stop->mrq = mrq;
 		}
 	}
-	mmc_host_clk_ungate(host);
+	mmc_host_clk_hold(host);
 	led_trigger_event(host->led, LED_FULL);
 	host->ops->request(host, mrq);
 }
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index b29d3e8fd3a2..96a26b2bf5f0 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -119,14 +119,14 @@ static void mmc_host_clk_gate_work(struct work_struct *work)
 }
 
 /**
- *	mmc_host_clk_ungate - ungate hardware MCI clocks
+ *	mmc_host_clk_hold - ungate hardware MCI clocks
  *	@host: host to ungate.
  *
  *	Makes sure the host ios.clock is restored to a non-zero value
  *	past this call.	Increase clock reference count and ungate clock
  *	if we're the first user.
  */
-void mmc_host_clk_ungate(struct mmc_host *host)
+void mmc_host_clk_hold(struct mmc_host *host)
 {
 	unsigned long flags;
 
@@ -164,14 +164,14 @@ static bool mmc_host_may_gate_card(struct mmc_card *card)
 }
 
 /**
- *	mmc_host_clk_gate - gate off hardware MCI clocks
+ *	mmc_host_clk_release - gate off hardware MCI clocks
  *	@host: host to gate.
  *
  *	Calls the host driver with ios.clock set to zero as often as possible
  *	in order to gate off hardware MCI clocks. Decrease clock reference
  *	count and schedule disabling of clock.
  */
-void mmc_host_clk_gate(struct mmc_host *host)
+void mmc_host_clk_release(struct mmc_host *host)
 {
 	unsigned long flags;
 
@@ -231,7 +231,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 	if (cancel_work_sync(&host->clk_gate_work))
 		mmc_host_clk_gate_delayed(host);
 	if (host->clk_gated)
-		mmc_host_clk_ungate(host);
+		mmc_host_clk_hold(host);
 	/* There should be only one user now */
 	WARN_ON(host->clk_requests > 1);
 }
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index de199f911928..fb8a5cd2e4a1 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -16,16 +16,16 @@ int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
 #ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_ungate(struct mmc_host *host);
-void mmc_host_clk_gate(struct mmc_host *host);
+void mmc_host_clk_hold(struct mmc_host *host);
+void mmc_host_clk_release(struct mmc_host *host);
 unsigned int mmc_host_clk_rate(struct mmc_host *host);
 
 #else
-static inline void mmc_host_clk_ungate(struct mmc_host *host)
+static inline void mmc_host_clk_hold(struct mmc_host *host)
 {
 }
 
-static inline void mmc_host_clk_gate(struct mmc_host *host)
+static inline void mmc_host_clk_release(struct mmc_host *host)
 {
 }
 

From 778e277cb82411c9002ca28ccbd216c4d9eb9158 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 18 Aug 2011 15:23:48 +0300
Subject: [PATCH 27/77] mmc: core: prevent aggressive clock gating racing with
 ios updates

We have seen at least two different races when clock gating kicks in in a
middle of ios structure update.

First one happens when ios->clock is changed outside of aggressive clock
gating framework, for example via mmc_set_clock(). The race might happen
when we run following code:

mmc_set_ios():
	...
	if (ios->clock > 0)
		mmc_set_ungated(host);

Now if gating kicks in right after the condition check we end up setting
host->clk_gated to false even though we have just gated the clock. Next
time a request is started we try to ungate and restore the clock in
mmc_host_clk_hold(). However since we have host->clk_gated set to false the
original clock is not restored.

This eventually will cause the host controller to hang since its clock is
disabled while we are trying to issue a request. For example on Intel
Medfield platform we see:

[   13.818610] mmc2: Timeout waiting for hardware interrupt.
[   13.818698] sdhci: =========== REGISTER DUMP (mmc2)===========
[   13.818753] sdhci: Sys addr: 0x00000000 | Version:  0x00008901
[   13.818804] sdhci: Blk size: 0x00000000 | Blk cnt:  0x00000000
[   13.818853] sdhci: Argument: 0x00000000 | Trn mode: 0x00000000
[   13.818903] sdhci: Present:  0x1fff0000 | Host ctl: 0x00000001
[   13.818951] sdhci: Power:    0x0000000d | Blk gap:  0x00000000
[   13.819000] sdhci: Wake-up:  0x00000000 | Clock:    0x00000000
[   13.819049] sdhci: Timeout:  0x00000000 | Int stat: 0x00000000
[   13.819098] sdhci: Int enab: 0x00ff00c3 | Sig enab: 0x00ff00c3
[   13.819147] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000
[   13.819196] sdhci: Caps:     0x6bee32b2 | Caps_1:   0x00000000
[   13.819245] sdhci: Cmd:      0x00000000 | Max curr: 0x00000000
[   13.819292] sdhci: Host ctl2: 0x00000000
[   13.819331] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000
[   13.819377] sdhci: ===========================================
[   13.919605] mmc2: Reset 0x2 never completed.

and it never recovers.

Second race might happen while running mmc_power_off():

static void mmc_power_off(struct mmc_host *host)
{
	host->ios.clock = 0;
	host->ios.vdd = 0;

[ clock gating kicks in here ]

	/*
	 * Reset ocr mask to be the highest possible voltage supported for
	 * this mmc host. This value will be used at next power up.
	 */
	host->ocr = 1 << (fls(host->ocr_avail) - 1);

	if (!mmc_host_is_spi(host)) {
		host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
		host->ios.chip_select = MMC_CS_DONTCARE;
	}
	host->ios.power_mode = MMC_POWER_OFF;
	host->ios.bus_width = MMC_BUS_WIDTH_1;
	host->ios.timing = MMC_TIMING_LEGACY;
	mmc_set_ios(host);
}

If the clock gating worker kicks in while we are only partially updated the
ios structure the host controller gets incomplete ios and might not work as
supposed. Again on Intel Medfield platform we get:

[    4.185349] kernel BUG at drivers/mmc/host/sdhci.c:1155!
[    4.185422] invalid opcode: 0000 [#1] PREEMPT SMP
[    4.185509] Modules linked in:
[    4.185565]
[    4.185608] Pid: 4, comm: kworker/0:0 Not tainted 3.0.0+ #240 Intel Corporation Medfield/iCDKA
[    4.185742] EIP: 0060:[<c136364e>] EFLAGS: 00010083 CPU: 0
[    4.185827] EIP is at sdhci_set_power+0x3e/0xd0
[    4.185891] EAX: f5ff98e0 EBX: f5ff98e0 ECX: 00000000 EDX: 00000001
[    4.185970] ESI: f5ff977c EDI: f5ff9904 EBP: f644fe98 ESP: f644fe94
[    4.186049]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[    4.186125] Process kworker/0:0 (pid: 4, ti=f644e000 task=f644c0e0 task.ti=f644e000)
[    4.186219] Stack:
[    4.186257]  f5ff98e0 f644feb0 c1365173 00000282 f5ff9460 f5ff96e0 f5ff96e0 f644feec
[    4.186418]  c1355bd8 f644c0e0 c1499c3d f5ff96e0 f644fed4 00000006 f5ff96e0 00000286
[    4.186579]  f644fedc c107922b f644feec 00000286 f5ff9460 f5ff9700 f644ff10 c135839e
[    4.186739] Call Trace:
[    4.186802]  [<c1365173>] sdhci_set_ios+0x1c3/0x340
[    4.186883]  [<c1355bd8>] mmc_gate_clock+0x68/0x120
[    4.186963]  [<c1499c3d>] ? _raw_spin_unlock_irqrestore+0x4d/0x60
[    4.187052]  [<c107922b>] ? trace_hardirqs_on+0xb/0x10
[    4.187134]  [<c135839e>] mmc_host_clk_gate_delayed+0xbe/0x130
[    4.187219]  [<c105ec09>] ? process_one_work+0xf9/0x5b0
[    4.187300]  [<c135841d>] mmc_host_clk_gate_work+0xd/0x10
[    4.187379]  [<c105ec82>] process_one_work+0x172/0x5b0
[    4.187457]  [<c105ec09>] ? process_one_work+0xf9/0x5b0
[    4.187538]  [<c1358410>] ? mmc_host_clk_gate_delayed+0x130/0x130
[    4.187625]  [<c105f3c8>] worker_thread+0x118/0x330
[    4.187700]  [<c1496cee>] ? preempt_schedule+0x2e/0x50
[    4.187779]  [<c105f2b0>] ? rescuer_thread+0x1f0/0x1f0
[    4.187857]  [<c1062cf4>] kthread+0x74/0x80
[    4.187931]  [<c1062c80>] ? __init_kthread_worker+0x60/0x60
[    4.188015]  [<c149acfa>] kernel_thread_helper+0x6/0xd
[    4.188079] Code: 81 fa 00 00 04 00 0f 84 a7 00 00 00 7f 21 81 fa 80 00 00 00 0f 84 92 00 00 00 81 fa 00 00 0
[    4.188780] EIP: [<c136364e>] sdhci_set_power+0x3e/0xd0 SS:ESP 0068:f644fe94
[    4.188898] ---[ end trace a7b23eecc71777e4 ]---

This BUG() comes from the fact that ios.power_mode was still in previous
value (MMC_POWER_ON) and ios.vdd was set to zero.

We prevent these by inhibiting the clock gating while we update the ios
structure.

Both problems can be reproduced by simply running the device in a reboot
loop.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Chris Ball <cjb@laptop.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 63ffc65f84af..b27b94078c21 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -728,15 +728,17 @@ static inline void mmc_set_ios(struct mmc_host *host)
  */
 void mmc_set_chip_select(struct mmc_host *host, int mode)
 {
+	mmc_host_clk_hold(host);
 	host->ios.chip_select = mode;
 	mmc_set_ios(host);
+	mmc_host_clk_release(host);
 }
 
 /*
  * Sets the host clock to the highest possible frequency that
  * is below "hz".
  */
-void mmc_set_clock(struct mmc_host *host, unsigned int hz)
+static void __mmc_set_clock(struct mmc_host *host, unsigned int hz)
 {
 	WARN_ON(hz < host->f_min);
 
@@ -747,6 +749,13 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz)
 	mmc_set_ios(host);
 }
 
+void mmc_set_clock(struct mmc_host *host, unsigned int hz)
+{
+	mmc_host_clk_hold(host);
+	__mmc_set_clock(host, hz);
+	mmc_host_clk_release(host);
+}
+
 #ifdef CONFIG_MMC_CLKGATE
 /*
  * This gates the clock by setting it to 0 Hz.
@@ -779,7 +788,7 @@ void mmc_ungate_clock(struct mmc_host *host)
 	if (host->clk_old) {
 		BUG_ON(host->ios.clock);
 		/* This call will also set host->clk_gated to false */
-		mmc_set_clock(host, host->clk_old);
+		__mmc_set_clock(host, host->clk_old);
 	}
 }
 
@@ -807,8 +816,10 @@ void mmc_set_ungated(struct mmc_host *host)
  */
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 {
+	mmc_host_clk_hold(host);
 	host->ios.bus_mode = mode;
 	mmc_set_ios(host);
+	mmc_host_clk_release(host);
 }
 
 /*
@@ -816,8 +827,10 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
  */
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 {
+	mmc_host_clk_hold(host);
 	host->ios.bus_width = width;
 	mmc_set_ios(host);
+	mmc_host_clk_release(host);
 }
 
 /**
@@ -1015,8 +1028,10 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
 
 		ocr &= 3 << bit;
 
+		mmc_host_clk_hold(host);
 		host->ios.vdd = bit;
 		mmc_set_ios(host);
+		mmc_host_clk_release(host);
 	} else {
 		pr_warning("%s: host doesn't support card's voltages\n",
 				mmc_hostname(host));
@@ -1063,8 +1078,10 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11
  */
 void mmc_set_timing(struct mmc_host *host, unsigned int timing)
 {
+	mmc_host_clk_hold(host);
 	host->ios.timing = timing;
 	mmc_set_ios(host);
+	mmc_host_clk_release(host);
 }
 
 /*
@@ -1072,8 +1089,10 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
  */
 void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
 {
+	mmc_host_clk_hold(host);
 	host->ios.drv_type = drv_type;
 	mmc_set_ios(host);
+	mmc_host_clk_release(host);
 }
 
 /*
@@ -1091,6 +1110,8 @@ static void mmc_power_up(struct mmc_host *host)
 {
 	int bit;
 
+	mmc_host_clk_hold(host);
+
 	/* If ocr is set, we use it */
 	if (host->ocr)
 		bit = ffs(host->ocr) - 1;
@@ -1126,10 +1147,14 @@ static void mmc_power_up(struct mmc_host *host)
 	 * time required to reach a stable voltage.
 	 */
 	mmc_delay(10);
+
+	mmc_host_clk_release(host);
 }
 
 static void mmc_power_off(struct mmc_host *host)
 {
+	mmc_host_clk_hold(host);
+
 	host->ios.clock = 0;
 	host->ios.vdd = 0;
 
@@ -1147,6 +1172,8 @@ static void mmc_power_off(struct mmc_host *host)
 	host->ios.bus_width = MMC_BUS_WIDTH_1;
 	host->ios.timing = MMC_TIMING_LEGACY;
 	mmc_set_ios(host);
+
+	mmc_host_clk_release(host);
 }
 
 /*

From 50a50f9248497484c678631a9c1a719f1aaeab79 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 18 Aug 2011 15:23:49 +0300
Subject: [PATCH 28/77] mmc: core: use non-reentrant workqueue for clock gating

The default multithread workqueue can cause the same work to be executed
concurrently on a different CPUs. This isn't really suitable for clock
gating as it might already gated the clock and gating it twice results both
host->clk_old and host->ios.clock to be set to 0.

To prevent this from happening we use system_nrt_wq instead.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Chris Ball <cjb@laptop.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/host.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 96a26b2bf5f0..793d0a0dad8d 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -179,7 +179,7 @@ void mmc_host_clk_release(struct mmc_host *host)
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		schedule_work(&host->clk_gate_work);
+		queue_work(system_nrt_wq, &host->clk_gate_work);
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 

From b91df1593e361109f1fe665ce17c5e87ca60582b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 19 Aug 2011 10:07:07 +0900
Subject: [PATCH 29/77] mmc: sdhi: initialise mmc_data->flags before use

This corrects a logic error that I introduced in
"mmc: sdhi: Add write16_hook"

Reported-by: Magnus Damm <magnus.damm@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 774f6439d7ce..0c4a672f5db6 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -120,11 +120,11 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 	mmc_data->hclk = clk_get_rate(priv->clk);
 	mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
 	mmc_data->get_cd = sh_mobile_sdhi_get_cd;
-	if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
-		mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
 	mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
 	if (p) {
 		mmc_data->flags = p->tmio_flags;
+		if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
+			mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
 		mmc_data->ocr_mask = p->tmio_ocr_mask;
 		mmc_data->capabilities |= p->tmio_caps;
 

From 93c712f99d8e412b2d297edfe9f59b90636897c1 Mon Sep 17 00:00:00 2001
From: Subhash Jadavani <subhashj@codeaurora.org>
Date: Tue, 9 Aug 2011 12:19:31 +0530
Subject: [PATCH 30/77] mmc: sd: UHS-I bus speed should be set last in UHS
 initialization

mmc_sd_init_uhs_card function sets the driver type, current limit
and bus speed mode on card as well as on host controller side.

Currently bus speed mode is set by sending CMD6 to card and
immediately setting the timing mode in host controller. But
then before initiating tuning sequence, it also tries to set
current limit by sending CMD6 to card which results in data
timeout errors in controller if bus speed mode is SDR50/SDR104 mode.

So basically bus speed mode should be set only after current limit
is set in the card and immediately after setting the bus speed mode,
tuning sequence should be initiated.

Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c | 81 ++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 633975ff2bb3..0370e03e3142 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -469,56 +469,75 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	return 0;
 }
 
-static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+static void sd_update_bus_speed_mode(struct mmc_card *card)
 {
-	unsigned int bus_speed = 0, timing = 0;
-	int err;
-
 	/*
 	 * If the host doesn't support any of the UHS-I modes, fallback on
 	 * default speed.
 	 */
 	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
-		return 0;
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) {
+		card->sd_bus_speed = 0;
+		return;
+	}
 
 	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
 	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
-			bus_speed = UHS_SDR104_BUS_SPEED;
-			timing = MMC_TIMING_UHS_SDR104;
-			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+			card->sd_bus_speed = UHS_SDR104_BUS_SPEED;
 	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
 		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
-			bus_speed = UHS_DDR50_BUS_SPEED;
-			timing = MMC_TIMING_UHS_DDR50;
-			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+			card->sd_bus_speed = UHS_DDR50_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
 		    SD_MODE_UHS_SDR50)) {
-			bus_speed = UHS_SDR50_BUS_SPEED;
-			timing = MMC_TIMING_UHS_SDR50;
-			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+			card->sd_bus_speed = UHS_SDR50_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
 		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
-			bus_speed = UHS_SDR25_BUS_SPEED;
-			timing = MMC_TIMING_UHS_SDR25;
-			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+			card->sd_bus_speed = UHS_SDR25_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
 		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
 		    SD_MODE_UHS_SDR12)) {
-			bus_speed = UHS_SDR12_BUS_SPEED;
-			timing = MMC_TIMING_UHS_SDR12;
-			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+			card->sd_bus_speed = UHS_SDR12_BUS_SPEED;
+	}
+}
+
+static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+{
+	int err;
+	unsigned int timing = 0;
+
+	switch (card->sd_bus_speed) {
+	case UHS_SDR104_BUS_SPEED:
+		timing = MMC_TIMING_UHS_SDR104;
+		card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+		break;
+	case UHS_DDR50_BUS_SPEED:
+		timing = MMC_TIMING_UHS_DDR50;
+		card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+		break;
+	case UHS_SDR50_BUS_SPEED:
+		timing = MMC_TIMING_UHS_SDR50;
+		card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+		break;
+	case UHS_SDR25_BUS_SPEED:
+		timing = MMC_TIMING_UHS_SDR25;
+		card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+		break;
+	case UHS_SDR12_BUS_SPEED:
+		timing = MMC_TIMING_UHS_SDR12;
+		card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+		break;
+	default:
+		return 0;
 	}
 
-	card->sd_bus_speed = bus_speed;
-	err = mmc_sd_switch(card, 1, 0, bus_speed, status);
+	err = mmc_sd_switch(card, 1, 0, card->sd_bus_speed, status);
 	if (err)
 		return err;
 
-	if ((status[16] & 0xF) != bus_speed)
+	if ((status[16] & 0xF) != card->sd_bus_speed)
 		printk(KERN_WARNING "%s: Problem setting bus speed mode!\n",
 			mmc_hostname(card->host));
 	else {
@@ -618,18 +637,24 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
 	}
 
+	/*
+	 * Select the bus speed mode depending on host
+	 * and card capability.
+	 */
+	sd_update_bus_speed_mode(card);
+
 	/* Set the driver strength for the card */
 	err = sd_select_driver_type(card, status);
 	if (err)
 		goto out;
 
-	/* Set bus speed mode of the card */
-	err = sd_set_bus_speed_mode(card, status);
+	/* Set current limit for the card */
+	err = sd_set_current_limit(card, status);
 	if (err)
 		goto out;
 
-	/* Set current limit for the card */
-	err = sd_set_current_limit(card, status);
+	/* Set bus speed mode of the card */
+	err = sd_set_bus_speed_mode(card, status);
 	if (err)
 		goto out;
 

From 49bb1e619568ec84785ceb366f07db2a6f0b64cc Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Fri, 26 Aug 2011 14:58:18 +0530
Subject: [PATCH 31/77] mmc: sdhci-s3c: Fix mmc card I/O problem

This patch fixes the problem in sdhci-s3c host driver for Samsung Soc's.
During the card identification stage the mmc core driver enumerates for
the best bus width in combination with the highest available data rate.
It starts enumerating from the highest bus width (8) to lowest width (1).

In case of few MMC cards the 4-bit bus enumeration fails and tries
the 1-bit bus enumeration. When switched to 1-bit bus mode the host driver
has to clear the previous bus width setting and apply the new setting.

The current patch will clear the previous bus mode and apply the new
mode setting.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-s3c.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 2bd7bf4fece7..fe886d6c474a 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -302,6 +302,8 @@ static int sdhci_s3c_platform_8bit_width(struct sdhci_host *host, int width)
 		ctrl &= ~SDHCI_CTRL_8BITBUS;
 		break;
 	default:
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		ctrl &= ~SDHCI_CTRL_8BITBUS;
 		break;
 	}
 

From aca420bc51f48b0701963ba3a6234442a0cabebd Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 31 Aug 2011 14:45:53 -0700
Subject: [PATCH 32/77] libceph: fix leak of osd structs during shutdown

We want to remove all OSDs, not just those on the idle LRU.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ce310eee708d..16836a7df7a6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -685,6 +685,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 	put_osd(osd);
 }
 
+static void remove_all_osds(struct ceph_osd_client *osdc)
+{
+	dout("__remove_old_osds %p\n", osdc);
+	mutex_lock(&osdc->request_mutex);
+	while (!RB_EMPTY_ROOT(&osdc->osds)) {
+		struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
+						struct ceph_osd, o_node);
+		__remove_osd(osdc, osd);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
 static void __move_osd_to_lru(struct ceph_osd_client *osdc,
 			      struct ceph_osd *osd)
 {
@@ -701,14 +713,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd)
 		list_del_init(&osd->o_osd_lru);
 }
 
-static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
+static void remove_old_osds(struct ceph_osd_client *osdc)
 {
 	struct ceph_osd *osd, *nosd;
 
 	dout("__remove_old_osds %p\n", osdc);
 	mutex_lock(&osdc->request_mutex);
 	list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
-		if (!remove_all && time_before(jiffies, osd->lru_ttl))
+		if (time_before(jiffies, osd->lru_ttl))
 			break;
 		__remove_osd(osdc, osd);
 	}
@@ -751,6 +763,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
 	struct rb_node *parent = NULL;
 	struct ceph_osd *osd = NULL;
 
+	dout("__insert_osd %p osd%d\n", new, new->o_osd);
 	while (*p) {
 		parent = *p;
 		osd = rb_entry(parent, struct ceph_osd, o_node);
@@ -1144,7 +1157,7 @@ static void handle_osds_timeout(struct work_struct *work)
 
 	dout("osds timeout\n");
 	down_read(&osdc->map_sem);
-	remove_old_osds(osdc, 0);
+	remove_old_osds(osdc);
 	up_read(&osdc->map_sem);
 
 	schedule_delayed_work(&osdc->osds_timeout_work,
@@ -1862,8 +1875,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 		ceph_osdmap_destroy(osdc->osdmap);
 		osdc->osdmap = NULL;
 	}
-	remove_old_osds(osdc, 1);
-	WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
+	remove_all_osds(osdc);
 	mempool_destroy(osdc->req_mempool);
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);

From d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Fri, 19 Aug 2011 15:57:16 +0100
Subject: [PATCH 33/77] xen: use maximum reservation to limit amount of usable
 RAM

Use the domain's maximum reservation to limit the amount of extra RAM
for the memory balloon. This reduces the size of the pages tables and
the amount of reserved low memory (which defaults to about 1/32 of the
total RAM).

On a system with 8 GiB of RAM with the domain limited to 1 GiB the
kernel reports:

Before:

Memory: 627792k/4472000k available

After:

Memory: 549740k/11132224k available

A increase of about 76 MiB (~1.5% of the unused 7 GiB).  The reserved
low memory is also reduced from 253 MiB to 32 MiB.  The total
additional usable RAM is 329 MiB.

For dom0, this requires at patch to Xen ('x86: use 'dom0_mem' to limit
the number of pages for dom0') (c/s 23790)

CC: stable@kernel.org
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 02ffd9e48c9f..ff3dfa176814 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -183,6 +183,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
 					PFN_UP(start_pci), PFN_DOWN(last));
 	return identity;
 }
+
+static unsigned long __init xen_get_max_pages(void)
+{
+	unsigned long max_pages = MAX_DOMAIN_PAGES;
+	domid_t domid = DOMID_SELF;
+	int ret;
+
+	ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+	if (ret > 0)
+		max_pages = ret;
+	return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
@@ -291,6 +304,12 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
+	extra_limit = xen_get_max_pages();
+	if (extra_limit >= max_pfn)
+		extra_pages = extra_limit - max_pfn;
+	else
+		extra_pages = 0;
+
 	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
 
 	/*

From d198d499148a0c64a41b3aba9e7dd43772832b91 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Thu, 1 Sep 2011 13:46:55 +0200
Subject: [PATCH 34/77] xen: x86_32: do not enable iterrupts when returning
 from exception in interrupt context

If vmalloc page_fault happens inside of interrupt handler with interrupts
disabled then on exit path from exception handler when there is no pending
interrupts, the following code (arch/x86/xen/xen-asm_32.S:112):

	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
	sete XEN_vcpu_info_mask(%eax)

will enable interrupts even if they has been previously disabled according to
eflags from the bounce frame (arch/x86/xen/xen-asm_32.S:99)

	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
	setz XEN_vcpu_info_mask(%eax)

Solution is in setting XEN_vcpu_info_mask only when it should be set
according to
	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
but not clearing it if there isn't any pending events.

Reproducer for bug is attached to RHBZ 707552

CC: stable@kernel.org
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/xen-asm_32.S | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093b5862..b040b0e518ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
 
 	/*
 	 * If there's something pending, mask events again so we can
-	 * jump back into xen_hypervisor_callback
+	 * jump back into xen_hypervisor_callback. Otherwise do not
+	 * touch XEN_vcpu_info_mask.
 	 */
-	sete XEN_vcpu_info_mask(%eax)
+	jne 1f
+	movb $1, XEN_vcpu_info_mask(%eax)
 
-	popl %eax
+1:	popl %eax
 
 	/*
 	 * From this point on the registers are restored and the stack

From ed467e69f16e6b480e2face7bc5963834d025f91 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 1 Sep 2011 09:48:27 -0400
Subject: [PATCH 35/77] xen/smp: Warn user why they keel over - nosmp or noapic
 and what to use instead.

We have hit a couple of customer bugs where they would like to
use those parameters to run an UP kernel - but both of those
options turn of important sources of interrupt information so
we end up not being able to boot. The correct way is to
pass in 'dom0_max_vcpus=1' on the Xen hypervisor line and
the kernel will patch itself to be a UP kernel.

Fixes bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=637308

CC: stable@kernel.org
Acked-by: Ian Campbell <Ian.Campbell@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/smp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e79dbb95482b..d4fc6d454f8d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
 #include <xen/page.h>
 #include <xen/events.h>
 
+#include <xen/hvc-console.h>
 #include "xen-ops.h"
 #include "mmu.h"
 
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	unsigned cpu;
 	unsigned int i;
 
+	if (skip_ioapic_setup) {
+		char *m = (max_cpus == 0) ?
+			"The nosmp parameter is incompatible with Xen; " \
+			"use Xen dom0_max_vcpus=1 parameter" :
+			"The noapic parameter is incompatible with Xen";
+
+		xen_raw_printk(m);
+		panic(m);
+	}
 	xen_init_lock_cpu(0);
 
 	smp_store_cpu_info(0);

From f1ca1512e765337a7c09eb875eedef8ea4e07654 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 2 Sep 2011 14:10:32 +0200
Subject: [PATCH 36/77] iommu/amd: Make sure iommu->need_sync contains correct
 value

The value is only set to true but never set back to false,
which causes to many completion-wait commands to be sent to
hardware. Fix it with this patch.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a14f8dc23462..45652231dae8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -605,7 +605,9 @@ static void build_inv_all(struct iommu_cmd *cmd)
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
  */
-static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int iommu_queue_command_sync(struct amd_iommu *iommu,
+				    struct iommu_cmd *cmd,
+				    bool sync)
 {
 	u32 left, tail, head, next_tail;
 	unsigned long flags;
@@ -639,13 +641,18 @@ again:
 	copy_cmd_to_buffer(iommu, cmd, tail);
 
 	/* We need to sync now to make sure all commands are processed */
-	iommu->need_sync = true;
+	iommu->need_sync = sync;
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return 0;
 }
 
+static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+{
+	return iommu_queue_command_sync(iommu, cmd, true);
+}
+
 /*
  * This function queues a completion wait command into the command
  * buffer of an IOMMU
@@ -661,7 +668,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	build_completion_wait(&cmd, (u64)&sem);
 
-	ret = iommu_queue_command(iommu, &cmd);
+	ret = iommu_queue_command_sync(iommu, &cmd, false);
 	if (ret)
 		return ret;
 

From e33acde91140f1809952d1c135c36feb66a51887 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 2 Sep 2011 14:19:50 +0200
Subject: [PATCH 37/77] iommu/amd: Don't take domain->lock recursivly

The domain_flush_devices() function takes the domain->lock.
But this function is only called from update_domain() which
itself is already called unter the domain->lock. This causes
a deadlock situation when the dma-address-space of a domain
grows larger than 1GB.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 45652231dae8..0e4227f457af 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -847,14 +847,9 @@ static void domain_flush_complete(struct protection_domain *domain)
 static void domain_flush_devices(struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&domain->lock, flags);
 
 	list_for_each_entry(dev_data, &domain->dev_list, list)
 		device_flush_dte(dev_data);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /****************************************************************************

From fb492c9160f3d40d09456a79cc669fba74d7d9cc Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 30 Aug 2011 17:45:10 +0100
Subject: [PATCH 38/77] ARM: 7067/1: mm: keep significant bits in pfn_valid

When ARCH_HAS_HOLES_MEMORYMODEL is selected, pfn_valid calls
memblock_is_memory to test validity of a pfn:

> memblock_is_memory(pfn << PAGE_SHIFT);

On LPAE systems this cuts off the top bits, as the shift occurs before
the value is promoted to a phys_addr_t.

This patch replaces the shift with a call to __pfn_to_phys (which casts
pfn to phys_addr_t before shifting), preventing the loss of significant
bits.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 91bca355cd31..cc7e2d8be9aa 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -298,7 +298,7 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_memory(pfn << PAGE_SHIFT);
+	return memblock_is_memory(__pfn_to_phys(pfn));
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif

From 1df726ef0a700587a712a3660b2caa8e533c7de9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 5 Sep 2011 08:58:29 +0100
Subject: [PATCH 39/77] NET: am79c961: fix race in link status code

The link status code operates from a timer, and writes the index
register without first taking a lock.  A well-placed interrupt
between writing the index register and reading the data register
could change the index register on us, which will return wrong data.
Add the necessary lock.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/net/arm/am79c961a.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index 52fe21e1e2cd..3b1416e3d217 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -308,8 +308,11 @@ static void am79c961_timer(unsigned long data)
 	struct net_device *dev = (struct net_device *)data;
 	struct dev_priv *priv = netdev_priv(dev);
 	unsigned int lnkstat, carrier;
+	unsigned long flags;
 
+	spin_lock_irqsave(&priv->chip_lock, flags);
 	lnkstat = read_ireg(dev->base_addr, ISALED0) & ISALED0_LNKST;
+	spin_unlock_irqrestore(&priv->chip_lock, flags);
 	carrier = netif_carrier_ok(dev);
 
 	if (lnkstat && !carrier) {

From da063d260969c4e5e5f91d911ba87f7f6b48ead0 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Mon, 29 Aug 2011 13:33:32 +0200
Subject: [PATCH 40/77] dmaengine/ste_dma40: add missing kernel doc for
 pending_queue

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ste_dma40.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index cd3a7c726bf8..486b6c0b44e3 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -174,6 +174,7 @@ struct d40_base;
  * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
  * transfer and call client callback.
  * @client: Cliented owned descriptor list.
+ * @pending_queue: Submitted jobs, to be issued by issue_pending()
  * @active: Active descriptor.
  * @queue: Queued jobs.
  * @dma_cfg: The client configuration of this dma channel.

From 3b3d5b0f855b3eec45a02832e97c3c1890ff8823 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Mon, 29 Aug 2011 13:33:33 +0200
Subject: [PATCH 41/77] dmaengine/ste_dma40: remove duplicate call to
 d40_pool_lli_free().

d40_desc_free() already calls d40_pool_lli_free().

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ste_dma40.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 486b6c0b44e3..37388d10497a 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -478,7 +478,6 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 
 		list_for_each_entry_safe(d, _d, &d40c->client, node)
 			if (async_tx_test_ack(&d->txd)) {
-				d40_pool_lli_free(d40c, d);
 				d40_desc_remove(d);
 				desc = d;
 				memset(desc, 0, sizeof(*desc));
@@ -1209,7 +1208,6 @@ static void dma_tasklet(unsigned long data)
 
 	if (!d40d->cyclic) {
 		if (async_tx_test_ack(&d40d->txd)) {
-			d40_pool_lli_free(d40c, d40d);
 			d40_desc_remove(d40d);
 			d40_desc_free(d40c, d40d);
 		} else {
@@ -1606,7 +1604,6 @@ static int d40_free_dma(struct d40_chan *d40c)
 	/* Release client owned descriptors */
 	if (!list_empty(&d40c->client))
 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
-			d40_pool_lli_free(d40c, d);
 			d40_desc_remove(d);
 			d40_desc_free(d40c, d);
 		}

From 7404368c22b4910ab839238e48d96be45180f6fc Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Mon, 29 Aug 2011 13:33:34 +0200
Subject: [PATCH 42/77] dmaengine/ste_dma40: fix Oops due to double free of
 client descriptor

The client list may exist in two lists at the same time. This makes free
fail since the same desc is freed multiple times. Remove desc from
client list when adding it to the pending queue. Move free of client owned
descriptors from free_dma() to terminate_all().

Unable to handle kernel paging request at virtual address 00100104
pgd = dea8c000
[00100104] *pgd=1ea62831, *pte=00000000, *ppte=00000000
Internal error: Oops: 817 [#1] PREEMPT SMP
Modules linked in:
CPU: 0    Not tainted  (3.1.0-rc3+ #58)
PC is at d40_free_chan_resources+0x64/0x330

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ste_dma40.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 37388d10497a..92ec0a26401a 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -644,8 +644,11 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
 	return d;
 }
 
+/* remove desc from current queue and add it to the pending_queue */
 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
 {
+	d40_desc_remove(desc);
+	desc->is_in_client_list = false;
 	list_add_tail(&desc->node, &d40c->pending_queue);
 }
 
@@ -803,6 +806,7 @@ done:
 static void d40_term_all(struct d40_chan *d40c)
 {
 	struct d40_desc *d40d;
+	struct d40_desc *_d;
 
 	/* Release active descriptors */
 	while ((d40d = d40_first_active_get(d40c))) {
@@ -822,6 +826,14 @@ static void d40_term_all(struct d40_chan *d40c)
 		d40_desc_free(d40c, d40d);
 	}
 
+	/* Release client owned descriptors */
+	if (!list_empty(&d40c->client))
+		list_for_each_entry_safe(d40d, _d, &d40c->client, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
+
+
 	d40c->pending_tx = 0;
 	d40c->busy = false;
 }
@@ -1594,20 +1606,10 @@ static int d40_free_dma(struct d40_chan *d40c)
 	u32 event;
 	struct d40_phy_res *phy = d40c->phy_chan;
 	bool is_src;
-	struct d40_desc *d;
-	struct d40_desc *_d;
-
 
 	/* Terminate all queued and active transfers */
 	d40_term_all(d40c);
 
-	/* Release client owned descriptors */
-	if (!list_empty(&d40c->client))
-		list_for_each_entry_safe(d, _d, &d40c->client, node) {
-			d40_desc_remove(d);
-			d40_desc_free(d40c, d);
-		}
-
 	if (phy == NULL) {
 		chan_err(d40c, "phy == null\n");
 		return -EINVAL;

From 82babbb361f207a80cffa8ac34c2b6a0b62acc88 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Mon, 29 Aug 2011 13:33:35 +0200
Subject: [PATCH 43/77] dmaengine/ste_dma40: fix memory leak due to prepared
 descriptors

Prepared descriptors that are not submitted will not be freed. Add
prepared descriptor to a list to be able to release them upon
dmaengine_terminate_all().

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/ste_dma40.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 92ec0a26401a..467e4dcb20a0 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -177,6 +177,7 @@ struct d40_base;
  * @pending_queue: Submitted jobs, to be issued by issue_pending()
  * @active: Active descriptor.
  * @queue: Queued jobs.
+ * @prepare_queue: Prepared jobs.
  * @dma_cfg: The client configuration of this dma channel.
  * @configured: whether the dma_cfg configuration is valid
  * @base: Pointer to the device instance struct.
@@ -204,6 +205,7 @@ struct d40_chan {
 	struct list_head		 pending_queue;
 	struct list_head		 active;
 	struct list_head		 queue;
+	struct list_head		 prepare_queue;
 	struct stedma40_chan_cfg	 dma_cfg;
 	bool				 configured;
 	struct d40_base			*base;
@@ -833,6 +835,13 @@ static void d40_term_all(struct d40_chan *d40c)
 			d40_desc_free(d40c, d40d);
 		}
 
+	/* Release descriptors in prepare queue */
+	if (!list_empty(&d40c->prepare_queue))
+		list_for_each_entry_safe(d40d, _d,
+					 &d40c->prepare_queue, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
 
 	d40c->pending_tx = 0;
 	d40c->busy = false;
@@ -1911,6 +1920,12 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 		goto err;
 	}
 
+	/*
+	 * add descriptor to the prepare queue in order to be able
+	 * to free them later in terminate_all
+	 */
+	list_add_tail(&desc->node, &chan->prepare_queue);
+
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return &desc->txd;
@@ -2400,6 +2415,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 		INIT_LIST_HEAD(&d40c->queue);
 		INIT_LIST_HEAD(&d40c->pending_queue);
 		INIT_LIST_HEAD(&d40c->client);
+		INIT_LIST_HEAD(&d40c->prepare_queue);
 
 		tasklet_init(&d40c->tasklet, dma_tasklet,
 			     (unsigned long) d40c);

From 5204f5e3f5b3c706e52682590de5974a82ea54f9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 5 Sep 2011 08:07:47 -0700
Subject: [PATCH 44/77] regmap: Remove bitrotted module_put()s

The conversion to per bus type registration functions means we don't need
to do module_get()s to hold the bus types in memory (their users will link
to them) so we removed all those calls. This left module_put() calls in
the cleanup paths which aren't needed and which cause unbalanced puts if
we ever try to unload anything.

Reported-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0eef4da1ac61..20663f8dae45 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -168,13 +168,11 @@ struct regmap *regmap_init(struct device *dev,
 	map->work_buf = kmalloc(map->format.buf_size, GFP_KERNEL);
 	if (map->work_buf == NULL) {
 		ret = -ENOMEM;
-		goto err_bus;
+		goto err_map;
 	}
 
 	return map;
 
-err_bus:
-	module_put(map->bus->owner);
 err_map:
 	kfree(map);
 err:
@@ -188,7 +186,6 @@ EXPORT_SYMBOL_GPL(regmap_init);
 void regmap_exit(struct regmap *map)
 {
 	kfree(map->work_buf);
-	module_put(map->bus->owner);
 	kfree(map);
 }
 EXPORT_SYMBOL_GPL(regmap_exit);

From 5441ae5eb3614d3c28f77073370738a2820c88e4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 25 Jul 2011 18:06:32 +0000
Subject: [PATCH 45/77] fs/9p: Add fid before dentry instantiation

d_instantiate marks the dentry positive. So a parallel lookup and mkdir of
the directory can find dentry that doesn't have fid attached. This can result
in both the code path doing v9fs_fid_add which results in v9fs_dentry leak.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c      | 4 +---
 fs/9p/vfs_inode_dotl.c | 8 ++++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8bb5507e822f..43dd540663af 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -645,13 +645,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
-	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
 	if (err < 0)
 		goto error;
-
+	d_instantiate(dentry, inode);
 	return ofid;
-
 error:
 	if (ofid)
 		p9_client_clunk(ofid);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index b6c8ed205192..0ca224c8bb60 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -281,10 +281,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
-	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
 	if (err < 0)
 		goto error;
+	d_instantiate(dentry, inode);
 
 	/* Now set the ACL based on the default value */
 	v9fs_set_create_acl(dentry, &dacl, &pacl);
@@ -403,10 +403,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 				err);
 			goto error;
 		}
-		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
 			goto error;
+		d_instantiate(dentry, inode);
 		fid = NULL;
 	} else {
 		/*
@@ -657,10 +657,10 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 					err);
 			goto error;
 		}
-		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
 			goto error;
+		d_instantiate(dentry, inode);
 		fid = NULL;
 	} else {
 		/* Not in cached mode. No need to populate inode with stat */
@@ -810,10 +810,10 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 				err);
 			goto error;
 		}
-		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
 			goto error;
+		d_instantiate(dentry, inode);
 		fid = NULL;
 	} else {
 		/*

From 45089142b1497dab2327d60f6c71c40766fc3ea4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 25 Jul 2011 18:06:33 +0000
Subject: [PATCH 46/77] fs/9p: Don't update file type when updating file
 attributes

We should only update attributes that we can change on stat2inode.
Also do file type initialization in v9fs_init_inode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h       |  4 +-
 fs/9p/vfs_inode.c      | 91 +++++++++++++++++++++++-------------------
 fs/9p/vfs_inode_dotl.c | 23 +++++++----
 fs/9p/vfs_super.c      |  2 +-
 4 files changed, 68 insertions(+), 52 deletions(-)

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 46ce357ca1ab..7ac1faec2bde 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
 
 struct inode *v9fs_alloc_inode(struct super_block *sb);
 void v9fs_destroy_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-		    struct inode *inode, int mode);
+		    struct inode *inode, int mode, dev_t);
 void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 43dd540663af..3563cace0a2e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
 /**
  * p9mode2unixmode- convert plan9 mode bits to unix mode bits
  * @v9ses: v9fs session information
- * @mode: mode to convert
+ * @stat: p9_wstat from which mode need to be derived
+ * @rdev: major number, minor number in case of device files.
  *
  */
-
-static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+static int p9mode2unixmode(struct v9fs_session_info *v9ses,
+			   struct p9_wstat *stat, dev_t *rdev)
 {
 	int res;
+	int mode = stat->mode;
 
-	res = mode & 0777;
+	res = mode & S_IALLUGO;
+	*rdev = 0;
 
 	if ((mode & P9_DMDIR) == P9_DMDIR)
 		res |= S_IFDIR;
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 		 && (v9ses->nodev == 0))
 		res |= S_IFIFO;
 	else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
-		 && (v9ses->nodev == 0))
-		res |= S_IFBLK;
-	else
+		 && (v9ses->nodev == 0)) {
+		char type = 0, ext[32];
+		int major = -1, minor = -1;
+
+		strncpy(ext, stat->extension, sizeof(ext));
+		sscanf(ext, "%c %u %u", &type, &major, &minor);
+		switch (type) {
+		case 'c':
+			res |= S_IFCHR;
+			break;
+		case 'b':
+			res |= S_IFBLK;
+			break;
+		default:
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"Unknown special type %c %s\n", type,
+				stat->extension);
+		};
+		*rdev = MKDEV(major, minor);
+	} else
 		res |= S_IFREG;
 
 	if (v9fs_proto_dotu(v9ses)) {
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 		if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
 			res |= S_ISVTX;
 	}
-
 	return res;
 }
 
@@ -242,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode)
 }
 
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-		    struct inode *inode, int mode)
+		    struct inode *inode, int mode, dev_t rdev)
 {
 	int err = 0;
 
 	inode_init_owner(inode, NULL, mode);
 	inode->i_blocks = 0;
-	inode->i_rdev = 0;
+	inode->i_rdev = rdev;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_mapping->a_ops = &v9fs_addr_operations;
 
@@ -335,7 +354,7 @@ error:
  *
  */
 
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
 {
 	int err;
 	struct inode *inode;
@@ -348,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 		P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	err = v9fs_init_inode(v9ses, inode, mode);
+	err = v9fs_init_inode(v9ses, inode, mode, rdev);
 	if (err) {
 		iput(inode);
 		return ERR_PTR(err);
@@ -435,11 +454,12 @@ void v9fs_evict_inode(struct inode *inode)
 static int v9fs_test_inode(struct inode *inode, void *data)
 {
 	int umode;
+	dev_t rdev;
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 	struct p9_wstat *st = (struct p9_wstat *)data;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 
-	umode = p9mode2unixmode(v9ses, st->mode);
+	umode = p9mode2unixmode(v9ses, st, &rdev);
 	/* don't match inode of different type */
 	if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
 		return 0;
@@ -473,6 +493,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
 				   struct p9_wstat *st,
 				   int new)
 {
+	dev_t rdev;
 	int retval, umode;
 	unsigned long i_ino;
 	struct inode *inode;
@@ -496,8 +517,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
 	 * later.
 	 */
 	inode->i_ino = i_ino;
-	umode = p9mode2unixmode(v9ses, st->mode);
-	retval = v9fs_init_inode(v9ses, inode, umode);
+	umode = p9mode2unixmode(v9ses, st, &rdev);
+	retval = v9fs_init_inode(v9ses, inode, umode, rdev);
 	if (retval)
 		goto error;
 
@@ -1000,7 +1021,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		return PTR_ERR(st);
 
 	v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
-		generic_fillattr(dentry->d_inode, stat);
+	generic_fillattr(dentry->d_inode, stat);
 
 	p9stat_free(st);
 	kfree(st);
@@ -1084,6 +1105,7 @@ void
 v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	struct super_block *sb)
 {
+	mode_t mode;
 	char ext[32];
 	char tag_name[14];
 	unsigned int i_nlink;
@@ -1119,31 +1141,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 				inode->i_nlink = i_nlink;
 		}
 	}
-	inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
-	if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
-		char type = 0;
-		int major = -1;
-		int minor = -1;
-
-		strncpy(ext, stat->extension, sizeof(ext));
-		sscanf(ext, "%c %u %u", &type, &major, &minor);
-		switch (type) {
-		case 'c':
-			inode->i_mode &= ~S_IFBLK;
-			inode->i_mode |= S_IFCHR;
-			break;
-		case 'b':
-			break;
-		default:
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				"Unknown special type %c %s\n", type,
-				stat->extension);
-		};
-		inode->i_rdev = MKDEV(major, minor);
-		init_special_inode(inode, inode->i_mode, inode->i_rdev);
-	} else
-		inode->i_rdev = 0;
-
+	mode = stat->mode & S_IALLUGO;
+	mode |= inode->i_mode & ~S_IALLUGO;
+	inode->i_mode = mode;
 	i_size_write(inode, stat->length);
 
 	/* not real number of blocks, but 512 byte ones ... */
@@ -1409,6 +1409,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 
 int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 {
+	int umode;
+	dev_t rdev;
 	loff_t i_size;
 	struct p9_wstat *st;
 	struct v9fs_session_info *v9ses;
@@ -1417,6 +1419,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 	st = p9_client_stat(fid);
 	if (IS_ERR(st))
 		return PTR_ERR(st);
+	/*
+	 * Don't update inode if the file type is different
+	 */
+	umode = p9mode2unixmode(v9ses, st, &rdev);
+	if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+		goto out;
 
 	spin_lock(&inode->i_lock);
 	/*
@@ -1428,6 +1436,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
 	if (v9ses->cache)
 		inode->i_size = i_size;
 	spin_unlock(&inode->i_lock);
+out:
 	p9stat_free(st);
 	kfree(st);
 	return 0;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0ca224c8bb60..a3f2540cc4b2 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -153,7 +153,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
 	 * later.
 	 */
 	inode->i_ino = i_ino;
-	retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+	retval = v9fs_init_inode(v9ses, inode,
+				 st->st_mode, new_decode_dev(st->st_rdev));
 	if (retval)
 		goto error;
 
@@ -414,7 +415,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 		 * inode with stat. We need to get an inode
 		 * so that we can set the acl with dentry
 		 */
-		inode = v9fs_get_inode(dir->i_sb, mode);
+		inode = v9fs_get_inode(dir->i_sb, mode, 0);
 		if (IS_ERR(inode)) {
 			err = PTR_ERR(inode);
 			goto error;
@@ -540,6 +541,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 void
 v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 {
+	mode_t mode;
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
 	if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -552,11 +554,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 		inode->i_uid = stat->st_uid;
 		inode->i_gid = stat->st_gid;
 		inode->i_nlink = stat->st_nlink;
-		inode->i_mode = stat->st_mode;
-		inode->i_rdev = new_decode_dev(stat->st_rdev);
 
-		if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
-			init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		mode = stat->st_mode & S_IALLUGO;
+		mode |= inode->i_mode & ~S_IALLUGO;
+		inode->i_mode = mode;
 
 		i_size_write(inode, stat->st_size);
 		inode->i_blocks = stat->st_blocks;
@@ -664,7 +665,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 		fid = NULL;
 	} else {
 		/* Not in cached mode. No need to populate inode with stat */
-		inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+		inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0);
 		if (IS_ERR(inode)) {
 			err = PTR_ERR(inode);
 			goto error;
@@ -820,7 +821,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 		 * Not in cached mode. No need to populate inode with stat.
 		 * socket syscall returns a fd, so we need instantiate
 		 */
-		inode = v9fs_get_inode(dir->i_sb, mode);
+		inode = v9fs_get_inode(dir->i_sb, mode, rdev);
 		if (IS_ERR(inode)) {
 			err = PTR_ERR(inode);
 			goto error;
@@ -886,6 +887,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
 	st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
 	if (IS_ERR(st))
 		return PTR_ERR(st);
+	/*
+	 * Don't update inode if the file type is different
+	 */
+	if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+		goto out;
 
 	spin_lock(&inode->i_lock);
 	/*
@@ -897,6 +903,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
 	if (v9ses->cache)
 		inode->i_size = i_size;
 	spin_unlock(&inode->i_lock);
+out:
 	kfree(st);
 	return 0;
 }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index feef6cdc1fd2..c70251d47ed1 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
 	else
 		sb->s_d_op = &v9fs_dentry_operations;
 
-	inode = v9fs_get_inode(sb, S_IFDIR | mode);
+	inode = v9fs_get_inode(sb, S_IFDIR | mode, 0);
 	if (IS_ERR(inode)) {
 		retval = PTR_ERR(inode);
 		goto release_sb;

From b49d8b5d7007a673796f3f99688b46931293873e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 17 Aug 2011 16:56:04 +0000
Subject: [PATCH 47/77] net/9p: Fix kernel crash with msize 512K

With msize equal to 512K (PAGE_SIZE * VIRTQUEUE_NUM), we hit multiple
crashes. This patch fix those.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 175b5135bdcf..e317583fcc73 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int in, out, inp, outp;
 	struct virtio_chan *chan = client->trans;
-	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 	unsigned long flags;
 	size_t pdata_off = 0;
 	struct trans_rpage_info *rpinfo = NULL;
@@ -346,7 +345,8 @@ req_retry_pinned:
 		 * Arrange in such a way that server places header in the
 		 * alloced memory and payload onto the user buffer.
 		 */
-		inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+		inp = pack_sg_list(chan->sg, out,
+				   VIRTQUEUE_NUM, req->rc->sdata, 11);
 		/*
 		 * Running executables in the filesystem may result in
 		 * a read request with kernel buffer as opposed to user buffer.
@@ -366,8 +366,8 @@ req_retry_pinned:
 		}
 		in += inp;
 	} else {
-		in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
-				req->rc->capacity);
+		in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
+				  req->rc->sdata, req->rc->capacity);
 	}
 
 	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
@@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = {
 	.close = p9_virtio_close,
 	.request = p9_virtio_request,
 	.cancel = p9_virtio_cancel,
-	.maxsize = PAGE_SIZE*VIRTQUEUE_NUM,
+
+	/*
+	 * We leave one entry for input and one entry for response
+	 * headers. We also skip one more entry to accomodate, address
+	 * that are not at page boundary, that can result in an extra
+	 * page in zero copy.
+	 */
+	.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
 	.pref = P9_TRANS_PREF_PAYLOAD_SEP,
 	.def = 0,
 	.owner = THIS_MODULE,

From f88657ce3f9713a0c62101dffb0e972a979e77b9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 3 Aug 2011 19:55:32 +0530
Subject: [PATCH 48/77] fs/9p: Add OS dependent open flags in 9p protocol

Some of the flags are OS/arch dependent we add a 9p
protocol value which maps to asm-generic/fcntl.h values in Linux
Based on the original patch from Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/9p/v9fs_vfs.h       |  2 ++
 fs/9p/vfs_file.c       |  2 +-
 fs/9p/vfs_inode.c      | 16 +++++++++++-
 fs/9p/vfs_inode_dotl.c | 55 +++++++++++++++++++++++++++++++++++++++++-
 include/net/9p/9p.h    | 24 ++++++++++++++++++
 5 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 7ac1faec2bde..410ffd6ceb5f 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -83,4 +83,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
 	v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
 	return;
 }
+
+int v9fs_open_to_dotl_flags(int flags);
 #endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3c173fcc2c5a..c2f107583125 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 	v9inode = V9FS_I(inode);
 	v9ses = v9fs_inode2v9ses(inode);
 	if (v9fs_proto_dotl(v9ses))
-		omode = file->f_flags;
+		omode = v9fs_open_to_dotl_flags(file->f_flags);
 	else
 		omode = v9fs_uflags2omode(file->f_flags,
 					v9fs_proto_dotu(v9ses));
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3563cace0a2e..9e3ea6ce6951 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -552,6 +552,19 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	return inode;
 }
 
+/**
+ * v9fs_at_to_dotl_flags- convert Linux specific AT flags to
+ * plan 9 AT flag.
+ * @flags: flags to convert
+ */
+static int v9fs_at_to_dotl_flags(int flags)
+{
+	int rflags = 0;
+	if (flags & AT_REMOVEDIR)
+		rflags |= P9_DOTL_AT_REMOVEDIR;
+	return rflags;
+}
+
 /**
  * v9fs_remove - helper function to remove files and directories
  * @dir: directory inode that is being deleted
@@ -579,7 +592,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
 		return retval;
 	}
 	if (v9fs_proto_dotl(v9ses))
-		retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags);
+		retval = p9_client_unlinkat(dfid, dentry->d_name.name,
+					    v9fs_at_to_dotl_flags(flags));
 	if (retval == -EOPNOTSUPP) {
 		/* Try the one based on path */
 		v9fid = v9fs_fid_clone(dentry);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a3f2540cc4b2..aded79fcd5cf 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -191,6 +191,58 @@ v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	return inode;
 }
 
+struct dotl_openflag_map {
+	int open_flag;
+	int dotl_flag;
+};
+
+static int v9fs_mapped_dotl_flags(int flags)
+{
+	int i;
+	int rflags = 0;
+	struct dotl_openflag_map dotl_oflag_map[] = {
+		{ O_CREAT,	P9_DOTL_CREATE },
+		{ O_EXCL,	P9_DOTL_EXCL },
+		{ O_NOCTTY,	P9_DOTL_NOCTTY },
+		{ O_TRUNC,	P9_DOTL_TRUNC },
+		{ O_APPEND,	P9_DOTL_APPEND },
+		{ O_NONBLOCK,	P9_DOTL_NONBLOCK },
+		{ O_DSYNC,	P9_DOTL_DSYNC },
+		{ FASYNC,	P9_DOTL_FASYNC },
+		{ O_DIRECT,	P9_DOTL_DIRECT },
+		{ O_LARGEFILE,	P9_DOTL_LARGEFILE },
+		{ O_DIRECTORY,	P9_DOTL_DIRECTORY },
+		{ O_NOFOLLOW,	P9_DOTL_NOFOLLOW },
+		{ O_NOATIME,	P9_DOTL_NOATIME },
+		{ O_CLOEXEC,	P9_DOTL_CLOEXEC },
+		{ O_SYNC,	P9_DOTL_SYNC},
+	};
+	for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
+		if (flags & dotl_oflag_map[i].open_flag)
+			rflags |= dotl_oflag_map[i].dotl_flag;
+	}
+	return rflags;
+}
+
+/**
+ * v9fs_open_to_dotl_flags- convert Linux specific open flags to
+ * plan 9 open flag.
+ * @flags: flags to convert
+ */
+int v9fs_open_to_dotl_flags(int flags)
+{
+	int rflags = 0;
+
+	/*
+	 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
+	 * and P9_DOTL_NOACCESS
+	 */
+	rflags |= flags & O_ACCMODE;
+	rflags |= v9fs_mapped_dotl_flags(flags);
+
+	return rflags;
+}
+
 /**
  * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
  * @dir: directory inode that is being created
@@ -259,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 			   "Failed to get acl values in creat %d\n", err);
 		goto error;
 	}
-	err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+	err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
+				    mode, gid, &qid);
 	if (err < 0) {
 		P9_DPRINTK(P9_DEBUG_VFS,
 				"p9_client_open_dotl failed in creat %d\n",
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 342dcf13d039..957ab99897a1 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -288,6 +288,30 @@ enum p9_perm_t {
 	P9_DMSETVTX = 0x00010000,
 };
 
+/* 9p2000.L open flags */
+#define P9_DOTL_RDONLY        00000000
+#define P9_DOTL_WRONLY        00000001
+#define P9_DOTL_RDWR          00000002
+#define P9_DOTL_NOACCESS      00000003
+#define P9_DOTL_CREATE        00000100
+#define P9_DOTL_EXCL          00000200
+#define P9_DOTL_NOCTTY        00000400
+#define P9_DOTL_TRUNC         00001000
+#define P9_DOTL_APPEND        00002000
+#define P9_DOTL_NONBLOCK      00004000
+#define P9_DOTL_DSYNC         00010000
+#define P9_DOTL_FASYNC        00020000
+#define P9_DOTL_DIRECT        00040000
+#define P9_DOTL_LARGEFILE     00100000
+#define P9_DOTL_DIRECTORY     00200000
+#define P9_DOTL_NOFOLLOW      00400000
+#define P9_DOTL_NOATIME       01000000
+#define P9_DOTL_CLOEXEC       02000000
+#define P9_DOTL_SYNC          04000000
+
+/* 9p2000.L at flags */
+#define P9_DOTL_AT_REMOVEDIR		0x200
+
 /**
  * enum p9_qid_t - QID types
  * @P9_QTDIR: directory

From 73f507171cfa407b19f254aef95cbb058c8180cf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 16 Aug 2011 22:19:28 +0530
Subject: [PATCH 49/77] fs/9p: Always ask new inode in lookup for cache mode
 disabled

This make sure we don't end up reusing the unlinked inode object.
The ideal way is to use inode i_generation. But i_generation is
not available in userspace always.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/9p/vfs_inode.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e3ea6ce6951..e3c03db3c788 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -825,6 +825,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 				      struct nameidata *nameidata)
 {
+	struct dentry *res;
 	struct super_block *sb;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *dfid, *fid;
@@ -856,22 +857,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 		return ERR_PTR(result);
 	}
-
-	inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+	/*
+	 * Make sure we don't use a wrong inode due to parallel
+	 * unlink. For cached mode create calls request for new
+	 * inode. But with cache disabled, lookup should do this.
+	 */
+	if (v9ses->cache)
+		inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+	else
+		inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
 	if (IS_ERR(inode)) {
 		result = PTR_ERR(inode);
 		inode = NULL;
 		goto error;
 	}
-
 	result = v9fs_fid_add(dentry, fid);
 	if (result < 0)
 		goto error_iput;
-
 inst_out:
-	d_add(dentry, inode);
-	return NULL;
-
+	/*
+	 * If we had a rename on the server and a parallel lookup
+	 * for the new name, then make sure we instantiate with
+	 * the new name. ie look up for a/b, while on server somebody
+	 * moved b under k and client parallely did a lookup for
+	 * k/b.
+	 */
+	res = d_materialise_unique(dentry, inode);
+	if (!IS_ERR(res))
+		return res;
+	result = PTR_ERR(res);
 error_iput:
 	iput(inode);
 error:

From 51b8b4fb32271d39fbdd760397406177b2b0fd36 Mon Sep 17 00:00:00 2001
From: Jim Garlick <garlick.jim@gmail.com>
Date: Sun, 21 Aug 2011 00:21:18 +0530
Subject: [PATCH 50/77] fs/9p: Use protocol-defined value for lock/getlock
 'type' field.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/9p/vfs_file.c    | 34 +++++++++++++++++++++++++++-------
 include/net/9p/9p.h |  5 +++++
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2f107583125..62857a810a79 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
 
 	/* convert posix lock to p9 tlock args */
 	memset(&flock, 0, sizeof(flock));
-	flock.type = fl->fl_type;
+	/* map the lock type */
+	switch (fl->fl_type) {
+	case F_RDLCK:
+		flock.type = P9_LOCK_TYPE_RDLCK;
+		break;
+	case F_WRLCK:
+		flock.type = P9_LOCK_TYPE_WRLCK;
+		break;
+	case F_UNLCK:
+		flock.type = P9_LOCK_TYPE_UNLCK;
+		break;
+	}
 	flock.start = fl->fl_start;
 	if (fl->fl_end == OFFSET_MAX)
 		flock.length = 0;
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 
 	/* convert posix lock to p9 tgetlock args */
 	memset(&glock, 0, sizeof(glock));
-	glock.type = fl->fl_type;
+	glock.type  = P9_LOCK_TYPE_UNLCK;
 	glock.start = fl->fl_start;
 	if (fl->fl_end == OFFSET_MAX)
 		glock.length = 0;
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 	res = p9_client_getlock_dotl(fid, &glock);
 	if (res < 0)
 		return res;
-	if (glock.type != F_UNLCK) {
-		fl->fl_type = glock.type;
+	/* map 9p lock type to os lock type */
+	switch (glock.type) {
+	case P9_LOCK_TYPE_RDLCK:
+		fl->fl_type = F_RDLCK;
+		break;
+	case P9_LOCK_TYPE_WRLCK:
+		fl->fl_type = F_WRLCK;
+		break;
+	case P9_LOCK_TYPE_UNLCK:
+		fl->fl_type = F_UNLCK;
+		break;
+	}
+	if (glock.type != P9_LOCK_TYPE_UNLCK) {
 		fl->fl_start = glock.start;
 		if (glock.length == 0)
 			fl->fl_end = OFFSET_MAX;
 		else
 			fl->fl_end = glock.start + glock.length - 1;
 		fl->fl_pid = glock.proc_id;
-	} else
-		fl->fl_type = F_UNLCK;
-
+	}
 	return res;
 }
 
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 957ab99897a1..a6326ef8ade6 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -312,6 +312,11 @@ enum p9_perm_t {
 /* 9p2000.L at flags */
 #define P9_DOTL_AT_REMOVEDIR		0x200
 
+/* 9p2000.L lock type */
+#define P9_LOCK_TYPE_RDLCK 0
+#define P9_LOCK_TYPE_WRLCK 1
+#define P9_LOCK_TYPE_UNLCK 2
+
 /**
  * enum p9_qid_t - QID types
  * @P9_QTDIR: directory

From ff71c182f461da5ae9d2d65f8a63f5a9193b9be1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Sun, 28 Aug 2011 13:01:49 -0700
Subject: [PATCH 51/77] hwmon: (max16065) Fix current calculation

Current calculation is completely wrong. Add missing brackets to fix it.

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Cc: stable@kernel.org # 3.0+
---
 drivers/hwmon/max16065.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c
index d94a24fdf4ba..dd2d7b9620c2 100644
--- a/drivers/hwmon/max16065.c
+++ b/drivers/hwmon/max16065.c
@@ -124,7 +124,7 @@ static inline int MV_TO_LIMIT(int mv, int range)
 
 static inline int ADC_TO_CURR(int adc, int gain)
 {
-	return adc * 1400000 / gain * 255;
+	return adc * 1400000 / (gain * 255);
 }
 
 /*

From d91aae1e52e5289a94f4ddff968decfc8d37271e Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Mon, 29 Aug 2011 22:53:20 -0700
Subject: [PATCH 52/77] hwmon: (max16065) Add chip access warning to
 documentation

The chips supported by the max16065 driver should not be accessed using direct
i2ctools commands. Add warning to driver documentation to alert users.

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/hwmon/max16065 | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065
index 44b4f61e04f9..c11f64a1f2ad 100644
--- a/Documentation/hwmon/max16065
+++ b/Documentation/hwmon/max16065
@@ -62,6 +62,13 @@ can be safely used to identify the chip. You will have to instantiate
 the devices explicitly. Please see Documentation/i2c/instantiating-devices for
 details.
 
+WARNING: Do not access chip registers using the i2cdump command, and do not use
+any of the i2ctools commands on a command register (0xa5 to 0xac). The chips
+supported by this driver interpret any access to a command register (including
+read commands) as request to execute the command in question. This may result in
+power loss, board resets, and/or Flash corruption. Worst case, your board may
+turn into a brick.
+
 
 Sysfs entries
 -------------

From f020b007d5dd24597f5e985a6309bcb8393797ed Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 31 Aug 2011 11:53:41 -0400
Subject: [PATCH 53/77] hwmon: (ucd9000/ucd9200) Optimize array walk

Rewrite the loop walking the id array during probe. The new code is
better adapted to a null-terminated array, and is also clearer and
more efficient than the original.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Axel Lin <axel.lin@gmail.com>
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/pmbus/ucd9000.c | 6 ++----
 drivers/hwmon/pmbus/ucd9200.c | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index ace1c7319734..d0ddb60155c9 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -141,13 +141,11 @@ static int ucd9000_probe(struct i2c_client *client,
 	block_buffer[ret] = '\0';
 	dev_info(&client->dev, "Device ID %s\n", block_buffer);
 
-	mid = NULL;
-	for (i = 0; i < ARRAY_SIZE(ucd9000_id); i++) {
-		mid = &ucd9000_id[i];
+	for (mid = ucd9000_id; mid->name[0]; mid++) {
 		if (!strncasecmp(mid->name, block_buffer, strlen(mid->name)))
 			break;
 	}
-	if (!mid || !strlen(mid->name)) {
+	if (!mid->name[0]) {
 		dev_err(&client->dev, "Unsupported device\n");
 		return -ENODEV;
 	}
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c
index ffcc1cf3609d..c65e9da707cc 100644
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -68,13 +68,11 @@ static int ucd9200_probe(struct i2c_client *client,
 	block_buffer[ret] = '\0';
 	dev_info(&client->dev, "Device ID %s\n", block_buffer);
 
-	mid = NULL;
-	for (i = 0; i < ARRAY_SIZE(ucd9200_id); i++) {
-		mid = &ucd9200_id[i];
+	for (mid = ucd9200_id; mid->name[0]; mid++) {
 		if (!strncasecmp(mid->name, block_buffer, strlen(mid->name)))
 			break;
 	}
-	if (!mid || !strlen(mid->name)) {
+	if (!mid->name[0]) {
 		dev_err(&client->dev, "Unsupported device\n");
 		return -ENODEV;
 	}

From 7a703aded97e01d7f4a6b8440a431117399666ba Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 30 Aug 2011 14:37:37 +0800
Subject: [PATCH 54/77] i2c-pxa2xx: return proper error code in
 ce4100_i2c_probe error paths

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-pxa-pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c
index 6659d269b841..b73da6cd6f91 100644
--- a/drivers/i2c/busses/i2c-pxa-pci.c
+++ b/drivers/i2c/busses/i2c-pxa-pci.c
@@ -109,12 +109,15 @@ static int __devinit ce4100_i2c_probe(struct pci_dev *dev,
 		return -EINVAL;
 	}
 	sds = kzalloc(sizeof(*sds), GFP_KERNEL);
-	if (!sds)
+	if (!sds) {
+		ret = -ENOMEM;
 		goto err_mem;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) {
 		sds->pdev[i] = add_i2c_device(dev, i);
 		if (IS_ERR(sds->pdev[i])) {
+			ret = PTR_ERR(sds->pdev[i]);
 			while (--i >= 0)
 				platform_device_unregister(sds->pdev[i]);
 			goto err_dev_add;

From 406bd18a7a39ef69f1d60a41d9de74932bcb98d4 Mon Sep 17 00:00:00 2001
From: John Bonesio <bones@secretlab.ca>
Date: Tue, 30 Aug 2011 11:46:08 -0600
Subject: [PATCH 55/77] i2c-tegra: Add of_match_table

This patch was intended to be part of 7ca2d1a105a239e300b937e9c41a10a4bd08f569
"i2c: Tegra: Add DeviceTree support". However, an early version of that patch,
which was missing a chunk, was applied to next-i2c. This change is that
missing chunk.

Signed-off-by: John Bonesio <bones@secretlab.ca>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-tegra.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 2440b7411978..126b4f060231 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -719,6 +719,17 @@ static int tegra_i2c_resume(struct platform_device *pdev)
 }
 #endif
 
+#if defined(CONFIG_OF)
+/* Match table for of_platform binding */
+static const struct of_device_id tegra_i2c_of_match[] __devinitconst = {
+	{ .compatible = "nvidia,tegra20-i2c", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tegra_i2c_of_match);
+#else
+#define tegra_i2c_of_match NULL
+#endif
+
 static struct platform_driver tegra_i2c_driver = {
 	.probe   = tegra_i2c_probe,
 	.remove  = tegra_i2c_remove,
@@ -729,6 +740,7 @@ static struct platform_driver tegra_i2c_driver = {
 	.driver  = {
 		.name  = "tegra-i2c",
 		.owner = THIS_MODULE,
+		.of_match_table = tegra_i2c_of_match,
 	},
 };
 

From 048e29cff95168ea3a9f176e84cc0bae54d0ae64 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 30 Aug 2011 11:46:09 -0600
Subject: [PATCH 56/77] i2c-tegra: add I2C_FUNC_SMBUS_EMUL

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 126b4f060231..17ded1d2f11d 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -531,7 +531,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[],
 
 static u32 tegra_i2c_func(struct i2c_adapter *adap)
 {
-	return I2C_FUNC_I2C;
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
 static const struct i2c_algorithm tegra_i2c_algo = {

From 96219c3a257cc8ba3b3cae67efdc88be37cf7c9d Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Tue, 30 Aug 2011 11:46:10 -0600
Subject: [PATCH 57/77] i2c-tegra: fix possible race condition after tx

In tegra_i2c_fill_tx_fifo, once we have finished pushing all the bytes
to the I2C hardware controller, the interrupt might happen before we
have updated i2c_dev->msg_buf_remaining at the end of the function.
Then, in tegra_i2c_isr, we will call again tegra_i2c_fill_tx_fifo
triggering weird behaviour. This has been shown to happen under real
conditions.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Tested-by: Vincent Palatin <vpalatin@chromium.org>
Acked-by: Rhyland Klein <rklein@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-tegra.c | 44 ++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 17ded1d2f11d..3c94c4a81a55 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -270,14 +270,30 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev)
 
 	/* Rounds down to not include partial word at the end of buf */
 	words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD;
-	if (words_to_transfer > tx_fifo_avail)
-		words_to_transfer = tx_fifo_avail;
 
-	i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer);
+	/* It's very common to have < 4 bytes, so optimize that case. */
+	if (words_to_transfer) {
+		if (words_to_transfer > tx_fifo_avail)
+			words_to_transfer = tx_fifo_avail;
 
-	buf += words_to_transfer * BYTES_PER_FIFO_WORD;
-	buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD;
-	tx_fifo_avail -= words_to_transfer;
+		/*
+		 * Update state before writing to FIFO.  If this casues us
+		 * to finish writing all bytes (AKA buf_remaining goes to 0) we
+		 * have a potential for an interrupt (PACKET_XFER_COMPLETE is
+		 * not maskable).  We need to make sure that the isr sees
+		 * buf_remaining as 0 and doesn't call us back re-entrantly.
+		 */
+		buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD;
+		tx_fifo_avail -= words_to_transfer;
+		i2c_dev->msg_buf_remaining = buf_remaining;
+		i2c_dev->msg_buf = buf +
+			words_to_transfer * BYTES_PER_FIFO_WORD;
+		barrier();
+
+		i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer);
+
+		buf += words_to_transfer * BYTES_PER_FIFO_WORD;
+	}
 
 	/*
 	 * If there is a partial word at the end of buf, handle it manually to
@@ -287,14 +303,15 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev)
 	if (tx_fifo_avail > 0 && buf_remaining > 0) {
 		BUG_ON(buf_remaining > 3);
 		memcpy(&val, buf, buf_remaining);
+
+		/* Again update before writing to FIFO to make sure isr sees. */
+		i2c_dev->msg_buf_remaining = 0;
+		i2c_dev->msg_buf = NULL;
+		barrier();
+
 		i2c_writel(i2c_dev, val, I2C_TX_FIFO);
-		buf_remaining = 0;
-		tx_fifo_avail--;
 	}
 
-	BUG_ON(tx_fifo_avail > 0 && buf_remaining > 0);
-	i2c_dev->msg_buf_remaining = buf_remaining;
-	i2c_dev->msg_buf = buf;
 	return 0;
 }
 
@@ -411,9 +428,10 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id)
 			tegra_i2c_mask_irq(i2c_dev, I2C_INT_TX_FIFO_DATA_REQ);
 	}
 
-	if ((status & I2C_INT_PACKET_XFER_COMPLETE) &&
-			!i2c_dev->msg_buf_remaining)
+	if (status & I2C_INT_PACKET_XFER_COMPLETE) {
+		BUG_ON(i2c_dev->msg_buf_remaining);
 		complete(&i2c_dev->msg_complete);
+	}
 
 	i2c_writel(i2c_dev, status, I2C_INT_STATUS);
 	if (i2c_dev->is_dvc)

From bb9ea77846620ed2b37e74c852d72c7a476b248c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 6 Sep 2011 08:08:13 +0100
Subject: [PATCH 58/77] ARM: 7081/1: mach-integrator: fix the clocksource

I was intrigued by the fact that the clock stood still on
the Integrator, but it wasn't strange at all, because the
timer was set up all wrong and probably has been for a
while. With this patch the clock starts ticking again:
make the timer periodic (reload), |= on the divisor bit
and load the timer before starting it.

Cc: stable@kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-integrator/integrator_ap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 2fbbdd5eac35..fcf0ae95651f 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -337,15 +337,15 @@ static unsigned long timer_reload;
 static void integrator_clocksource_init(u32 khz)
 {
 	void __iomem *base = (void __iomem *)TIMER2_VA_BASE;
-	u32 ctrl = TIMER_CTRL_ENABLE;
+	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
 
 	if (khz >= 1500) {
 		khz /= 16;
-		ctrl = TIMER_CTRL_DIV16;
+		ctrl |= TIMER_CTRL_DIV16;
 	}
 
-	writel(ctrl, base + TIMER_CTRL);
 	writel(0xffff, base + TIMER_LOAD);
+	writel(ctrl, base + TIMER_CTRL);
 
 	clocksource_mmio_init(base + TIMER_VALUE, "timer2",
 		khz * 1000, 200, 16, clocksource_mmio_readl_down);

From bac7e6ecf60933b68af910eb4c83a775a8b20b19 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 6 Sep 2011 07:45:46 +0100
Subject: [PATCH 59/77] ARM: 7080/1: l2x0: make sure I&D are not locked down on
 init

Fighting unfixed U-Boots and other beasts that may the cache in
a locked-down state when starting the kernel, we make sure to
disable all cache lock-down when initializing the l2x0 so we
are in a known state.

Cc: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Cc: Rabin Vincent <rabin.vincent@stericsson.com>
Cc: Adrian Bunk <adrian.bunk@movial.com>
Cc: Rob Herring <robherring2@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reported-by: Jan Rinze <janrinze@gmail.com>
Tested-by: Robert Marklund <robert.marklund@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hardware/cache-l2x0.h |  9 +++++++--
 arch/arm/mm/cache-l2x0.c                   | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index bfa706ffd968..99a6ed7e1bfd 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -45,8 +45,13 @@
 #define L2X0_CLEAN_INV_LINE_PA		0x7F0
 #define L2X0_CLEAN_INV_LINE_IDX		0x7F8
 #define L2X0_CLEAN_INV_WAY		0x7FC
-#define L2X0_LOCKDOWN_WAY_D		0x900
-#define L2X0_LOCKDOWN_WAY_I		0x904
+/*
+ * The lockdown registers repeat 8 times for L310, the L210 has only one
+ * D and one I lockdown register at 0x0900 and 0x0904.
+ */
+#define L2X0_LOCKDOWN_WAY_D_BASE	0x900
+#define L2X0_LOCKDOWN_WAY_I_BASE	0x904
+#define L2X0_LOCKDOWN_STRIDE		0x08
 #define L2X0_TEST_OPERATION		0xF00
 #define L2X0_LINE_DATA			0xF10
 #define L2X0_LINE_TAG			0xF30
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 44c086710d2b..9ecfdb511951 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -277,6 +277,25 @@ static void l2x0_disable(void)
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+static void __init l2x0_unlock(__u32 cache_id)
+{
+	int lockregs;
+	int i;
+
+	if (cache_id == L2X0_CACHE_ID_PART_L310)
+		lockregs = 8;
+	else
+		/* L210 and unknown types */
+		lockregs = 1;
+
+	for (i = 0; i < lockregs; i++) {
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+	}
+}
+
 void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 {
 	__u32 aux;
@@ -328,6 +347,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	 * accessing the below registers will fault.
 	 */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* Make sure that I&D is not locked down when starting */
+		l2x0_unlock(cache_id);
 
 		/* l2x0 controller is disabled */
 		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);

From dde58cfcc3b6dd2f160ffd355f76ae526155a4df Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Mon, 5 Sep 2011 18:45:28 +0200
Subject: [PATCH 60/77] HID: wacom: Fix error path of power-supply
 initialization

power_supply_unregister() must not be called if power_supply_register() failed.
The wdata->psy.dev pointer may point to invalid memory after a failed
power_supply_register() and hence wacom_remove() will fail while calling
power_supply_unregister().

This changes the wacom_probe function to fail if it cannot register the
power_supply devices. If we would want to keep the previous behaviour we had to
keep some flag about the power_supply state and check it on wacom_remove, but
this seems inappropriate here. Hence, we simply fail, too, if
power_supply_register fails.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wacom.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
index 06888323828c..f66a597cff63 100644
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -353,11 +353,7 @@ static int wacom_probe(struct hid_device *hdev,
 	if (ret) {
 		hid_warn(hdev, "can't create sysfs battery attribute, err: %d\n",
 			 ret);
-		/*
-		 * battery attribute is not critical for the tablet, but if it
-		 * failed then there is no need to create ac attribute
-		 */
-		goto move_on;
+		goto err_battery;
 	}
 
 	wdata->ac.properties = wacom_ac_props;
@@ -371,14 +367,8 @@ static int wacom_probe(struct hid_device *hdev,
 	if (ret) {
 		hid_warn(hdev,
 			 "can't create ac battery attribute, err: %d\n", ret);
-		/*
-		 * ac attribute is not critical for the tablet, but if it
-		 * failed then we don't want to battery attribute to exist
-		 */
-		power_supply_unregister(&wdata->battery);
+		goto err_ac;
 	}
-
-move_on:
 #endif
 	hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
 	input = hidinput->input;
@@ -416,6 +406,13 @@ move_on:
 
 	return 0;
 
+#ifdef CONFIG_HID_WACOM_POWER_SUPPLY
+err_ac:
+	power_supply_unregister(&wdata->battery);
+err_battery:
+	device_remove_file(&hdev->dev, &dev_attr_speed);
+	hid_hw_stop(hdev);
+#endif
 err_free:
 	kfree(wdata);
 	return ret;

From 9086617ea3a7f3e574ca64392b827bdd56f607eb Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Mon, 5 Sep 2011 18:45:29 +0200
Subject: [PATCH 61/77] HID: wacom: Unregister sysfs attributes on remove

HID devices can be hotplugged so we should unregister all sysfs attributes when
removing a driver. Otherwise, manually unloading the wacom-driver will not
remove the sysfs attributes. Only when the device is disconnected, they are
removed, eventually.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wacom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
index f66a597cff63..a597039d0755 100644
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -423,6 +423,7 @@ static void wacom_remove(struct hid_device *hdev)
 #ifdef CONFIG_HID_WACOM_POWER_SUPPLY
 	struct wacom_data *wdata = hid_get_drvdata(hdev);
 #endif
+	device_remove_file(&hdev->dev, &dev_attr_speed);
 	hid_hw_stop(hdev);
 
 #ifdef CONFIG_HID_WACOM_POWER_SUPPLY

From d2f152878d457a84f3708acee5f682322386a79b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Thu, 8 Sep 2011 10:16:50 -0700
Subject: [PATCH 62/77] wireless: fix kernel-doc warning in net/cfg80211.h

Fix kernel-doc warning in net/cfg80211.h:

  Warning(include/net/cfg80211.h:1884): No description found for parameter 'registered'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/cfg80211.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 408ae4882d22..401d73bd151f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1744,6 +1744,8 @@ struct wiphy_wowlan_support {
  *	by default for perm_addr. In this case, the mask should be set to
  *	all-zeroes. In this case it is assumed that the device can handle
  *	the same number of arbitrary MAC addresses.
+ * @registered: protects ->resume and ->suspend sysfs callbacks against
+ *	unregister hardware
  * @debugfsdir: debugfs directory used for this wiphy, will be renamed
  *	automatically on wiphy renames
  * @dev: (virtual) struct device for this wiphy

From bff747c58cf97bf4fc8b499ee0f419b59d6b226d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Thu, 8 Sep 2011 10:16:47 -0700
Subject: [PATCH 63/77] regulator: fix kernel-doc warning in consumer.h

Fix kernel-doc warning about internal/private data by marking it
as "private:" so that kernel-doc will ignore it.

  Warning(include/linux/regulator/consumer.h:128): No description found for parameter 'ret'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/regulator/consumer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 26f6ea4444e3..b47771aa5718 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -123,7 +123,7 @@ struct regulator_bulk_data {
 	const char *supply;
 	struct regulator *consumer;
 
-	/* Internal use */
+	/* private: Internal use */
 	int ret;
 };
 

From 0ec26fd0698285b31248e34bf1abb022c00f23d6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 5 Sep 2011 18:06:26 +0200
Subject: [PATCH 64/77] vfs: automount should ignore LOOKUP_FOLLOW

Prior to 2.6.38 automount would not trigger on either stat(2) or
lstat(2) on the automount point.

After 2.6.38, with the introduction of the ->d_automount()
infrastructure, stat(2) and others would start triggering automount
while lstat(2), etc. still would not.  This is a regression and a
userspace ABI change.

Problem originally reported here:

  http://thread.gmane.org/gmane.linux.kernel.autofs/6098

It appears that there was an attempt at fixing various userspace tools
to not trigger the automount.  But since the stat system call is
rather common it is impossible to "fix" all userspace.

This patch reverts the original behavior, which is to not trigger on
stat(2) and other symlink following syscalls.

[ It's not really clear what the right behavior is.  Apparently Solaris
  does the "automount on stat, leave alone on lstat".  And some programs
  can get unhappy when "stat+open+fstat" ends up giving a different
  result from the fstat than from the initial stat.

  But the change in 2.6.38 resulted in problems for some people, so
  we're going back to old behavior.  Maybe we can re-visit this
  discussion at some future date  - Linus ]

Reported-by: Leonardo Chiquitto <leonardo.lists@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Ian Kent <raven@themaw.net>
Cc: David Howells <dhowells@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2826db35dc25..b52bc685465f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -727,25 +727,22 @@ static int follow_automount(struct path *path, unsigned flags,
 	if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
 		return -EISDIR; /* we actually want to stop here */
 
-	/*
-	 * We don't want to mount if someone's just doing a stat and they've
-	 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
-	 * appended a '/' to the name.
+	/* We don't want to mount if someone's just doing a stat -
+	 * unless they're stat'ing a directory and appended a '/' to
+	 * the name.
+	 *
+	 * We do, however, want to mount if someone wants to open or
+	 * create a file of any type under the mountpoint, wants to
+	 * traverse through the mountpoint or wants to open the
+	 * mounted directory.  Also, autofs may mark negative dentries
+	 * as being automount points.  These will need the attentions
+	 * of the daemon to instantiate them before they can be used.
 	 */
-	if (!(flags & LOOKUP_FOLLOW)) {
-		/* We do, however, want to mount if someone wants to open or
-		 * create a file of any type under the mountpoint, wants to
-		 * traverse through the mountpoint or wants to open the mounted
-		 * directory.
-		 * Also, autofs may mark negative dentries as being automount
-		 * points.  These will need the attentions of the daemon to
-		 * instantiate them before they can be used.
-		 */
-		if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-			     LOOKUP_OPEN | LOOKUP_CREATE)) &&
-		    path->dentry->d_inode)
-			return -EISDIR;
-	}
+	if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+		     LOOKUP_OPEN | LOOKUP_CREATE)) &&
+	    path->dentry->d_inode)
+		return -EISDIR;
+
 	current->total_link_count++;
 	if (current->total_link_count >= 40)
 		return -ELOOP;

From 5307f6d5fb12fd01f9f321bc4a8fd77e74858647 Mon Sep 17 00:00:00 2001
From: Shyam Iyer <shyam.iyer.t@gmail.com>
Date: Thu, 8 Sep 2011 16:41:17 -0500
Subject: [PATCH 65/77] Fix pointer dereference before call to
 pcie_bus_configure_settings

Commit b03e7495a862 ("PCI: Set PCI-E Max Payload Size on fabric")
introduced a potential NULL pointer dereference in calls to
pcie_bus_configure_settings due to attempts to access pci_bus self
variables when the self pointer is NULL.

To correct this, verify that the self pointer in pci_bus is non-NULL
before dereferencing it.

Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Shyam Iyer <shyam_iyer@dell.com>
Signed-off-by: Jon Mason <mason@myri.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/pci/acpi.c              | 9 +++++++--
 drivers/pci/hotplug/pcihp_slot.c | 4 +++-
 drivers/pci/probe.c              | 3 ---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index c95330267f08..039d91315bc5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -365,8 +365,13 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
 	 */
 	if (bus) {
 		struct pci_bus *child;
-		list_for_each_entry(child, &bus->children, node)
-			pcie_bus_configure_settings(child, child->self->pcie_mpss);
+		list_for_each_entry(child, &bus->children, node) {
+			struct pci_dev *self = child->self;
+			if (!self)
+				continue;
+
+			pcie_bus_configure_settings(child, self->pcie_mpss);
+		}
 	}
 
 	if (!bus)
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
index 753b21aaea61..3ffd9c1acc0a 100644
--- a/drivers/pci/hotplug/pcihp_slot.c
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -169,7 +169,9 @@ void pci_configure_slot(struct pci_dev *dev)
 			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
 		return;
 
-	pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss);
+	if (dev->bus && dev->bus->self)
+		pcie_bus_configure_settings(dev->bus,
+					    dev->bus->self->pcie_mpss);
 
 	memset(&hpp, 0, sizeof(hpp));
 	ret = pci_get_hp_params(dev, &hpp);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8473727b29fa..0820fc1544e8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1456,9 +1456,6 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
 {
 	u8 smpss = mpss;
 
-	if (!bus->self)
-		return;
-
 	if (!pci_is_pcie(bus->self))
 		return;
 

From ed2888e906b56769b4ffabb9c577190438aa68b8 Mon Sep 17 00:00:00 2001
From: Jon Mason <mason@myri.com>
Date: Thu, 8 Sep 2011 16:41:18 -0500
Subject: [PATCH 66/77] PCI: Remove MRRS modification from MPS setting code

Modifying the Maximum Read Request Size to 0 (value of 128Bytes) has
massive negative ramifications on some devices.  Without knowing which
devices have this issue, do not modify from the default value when
walking the PCI-E bus in pcie_bus_safe mode.  Also, make pcie_bus_safe
the default procedure.

Tested-by: Sven Schnelle <svens@stackframe.org>
Tested-by: Simon Kirby <sim@hostway.ca>
Tested-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-and-tested-by: Niels Ole Salscheider <niels_ole@salscheider-online.de>
References: https://bugzilla.kernel.org/show_bug.cgi?id=42162
Signed-off-by: Jon Mason <mason@myri.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c   |  2 +-
 drivers/pci/probe.c | 41 ++++++++++++++++++++++-------------------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0ce67423a0a3..4e84fd4a4312 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
 
 /*
  * The default CLS is used if arch didn't set CLS explicitly and not
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 0820fc1544e8..b1187ff31d89 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1396,34 +1396,37 @@ static void pcie_write_mps(struct pci_dev *dev, int mps)
 
 static void pcie_write_mrrs(struct pci_dev *dev, int mps)
 {
-	int rc, mrrs;
+	int rc, mrrs, dev_mpss;
 
-	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
-		int dev_mpss = 128 << dev->pcie_mpss;
+	/* In the "safe" case, do not configure the MRRS.  There appear to be
+	 * issues with setting MRRS to 0 on a number of devices.
+	 */
 
-		/* For Max performance, the MRRS must be set to the largest
-		 * supported value.  However, it cannot be configured larger
-		 * than the MPS the device or the bus can support.  This assumes
-		 * that the largest MRRS available on the device cannot be
-		 * smaller than the device MPSS.
-		 */
-		mrrs = mps < dev_mpss ? mps : dev_mpss;
-	} else
-		/* In the "safe" case, configure the MRRS for fairness on the
-		 * bus by making all devices have the same size
-		 */
-		mrrs = mps;
+	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
+		return;
 
+	dev_mpss = 128 << dev->pcie_mpss;
+
+	/* For Max performance, the MRRS must be set to the largest supported
+	 * value.  However, it cannot be configured larger than the MPS the
+	 * device or the bus can support.  This assumes that the largest MRRS
+	 * available on the device cannot be smaller than the device MPSS.
+	 */
+	mrrs = min(mps, dev_mpss);
 
 	/* MRRS is a R/W register.  Invalid values can be written, but a
-	 * subsiquent read will verify if the value is acceptable or not.
+	 * subsequent read will verify if the value is acceptable or not.
 	 * If the MRRS value provided is not acceptable (e.g., too large),
 	 * shrink the value until it is acceptable to the HW.
  	 */
 	while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
+		dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value"
+			 " to %d.  If any issues are encountered, please try "
+			 "running with pci=pcie_bus_safe\n", mrrs);
 		rc = pcie_set_readrq(dev, mrrs);
 		if (rc)
-			dev_err(&dev->dev, "Failed attempting to set the MRRS\n");
+			dev_err(&dev->dev,
+				"Failed attempting to set the MRRS\n");
 
 		mrrs /= 2;
 	}
@@ -1436,13 +1439,13 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
 	if (!pci_is_pcie(dev))
 		return 0;
 
-	dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+	dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
 		 pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
 
 	pcie_write_mps(dev, mps);
 	pcie_write_mrrs(dev, mps);
 
-	dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+	dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
 		 pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
 
 	return 0;

From 94007751bb02797ba87bac7aacee2731ac2039a3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:20:21 +1000
Subject: [PATCH 67/77] Avoid dereferencing a 'request_queue' after last close.

On the last close of an 'md' device which as been stopped, the device
is destroyed and in particular the request_queue is freed.  The free
is done in a separate thread so it might happen a short time later.

__blkdev_put calls bdev_inode_switch_bdi *after* ->release has been
called.

Since commit f758eeabeb96f878c860e8f110f94ec8820822a9
bdev_inode_switch_bdi will dereference the 'old' bdi, which lives
inside a request_queue, to get a spin lock.  This causes the last
close on an md device to sometime take a spin_lock which lives in
freed memory - which results in an oops.

So move the called to bdev_inode_switch_bdi before the call to
->release.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: stable@kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/block_dev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index ff77262e887c..95f786ec7f08 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1429,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		WARN_ON_ONCE(bdev->bd_holders);
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
+		/* ->release can cause the old bdi to disappear,
+		 * so must switch it out first
+		 */
+		bdev_inode_switch_bdi(bdev->bd_inode,
+					&default_backing_dev_info);
 	}
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
@@ -1442,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
-		bdev_inode_switch_bdi(bdev->bd_inode,
-					&default_backing_dev_info);
 		if (bdev != bdev->bd_contains)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;

From 19d5f834d6aff7efb1c9353523865c5bce869470 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:21:17 +1000
Subject: [PATCH 68/77] md/raid10: unify handling of write completion.

A write can complete at two different places:
1/ when the last member-device write completes, through
   raid10_end_write_request
2/ in make_request() when we remove the initial bias from ->remaining.

These two should do exactly the same thing and the comment says they
do, but they don't.

So factor the correct code out into a function and call it in both
places.  This makes the code much more similar to RAID1.

The difference is only significant if there is an error, and they
usually take a while, so it is unlikely that there will be an error
already when make_request is completing, so this is unlikely to cause
real problems.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid10.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8b29cd4f01c8..f6873fc8e5ee 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio)
 	md_write_end(r10_bio->mddev);
 }
 
+static void one_write_done(r10bio_t *r10_bio)
+{
+	if (atomic_dec_and_test(&r10_bio->remaining)) {
+		if (test_bit(R10BIO_WriteError, &r10_bio->state))
+			reschedule_retry(r10_bio);
+		else {
+			close_write(r10_bio);
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				raid_end_bio_io(r10_bio);
+		}
+	}
+}
+
 static void raid10_end_write_request(struct bio *bio, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
 	 * Let's see if all mirrored write operations have finished
 	 * already.
 	 */
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (test_bit(R10BIO_WriteError, &r10_bio->state))
-			reschedule_retry(r10_bio);
-		else {
-			close_write(r10_bio);
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				raid_end_bio_io(r10_bio);
-		}
-	}
+	one_write_done(r10_bio);
 	if (dec_rdev)
 		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
@@ -1127,15 +1132,8 @@ retry_write:
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		/* This matches the end of raid10_end_write_request() */
-		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-				r10_bio->sectors,
-				!test_bit(R10BIO_Degraded, &r10_bio->state),
-				0);
-		md_write_end(mddev);
-		raid_end_bio_io(r10_bio);
-	}
+	/* Remove the bias on 'remaining' */
+	one_write_done(r10_bio);
 
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);

From 079fa166a2874985ae58b2e21e26e1cbc91127d4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:21:23 +1000
Subject: [PATCH 69/77] md/raid1,10: Remove use-after-free bug in make_request.

A single request to RAID1 or RAID10 might result in multiple
requests if there are known bad blocks that need to be avoided.

To detect if we need to submit another write request we test:
 	if (sectors_handled < (bio->bi_size >> 9)) {

However this is after we call **_write_done() so the 'bio' no longer
belongs to us - the writes could have completed and the bio freed.

So move the **_write_done call until after the test against
bio->bi_size.

This addresses https://bugzilla.kernel.org/show_bug.cgi?id=41862

Reported-by: Bruno Wolff III <bruno@wolff.to>
Tested-by: Bruno Wolff III <bruno@wolff.to>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid1.c  | 14 +++++++++-----
 drivers/md/raid10.c | 13 ++++++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 32323f0afd89..f4622dd8fc59 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1099,12 +1099,11 @@ read_again:
 		bio_list_add(&conf->pending_bio_list, mbio);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
-	r1_bio_write_done(r1_bio);
-
-	/* In case raid1d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
-
+	/* Mustn't call r1_bio_write_done before this next test,
+	 * as it could result in the bio being freed.
+	 */
 	if (sectors_handled < (bio->bi_size >> 9)) {
+		r1_bio_write_done(r1_bio);
 		/* We need another r1_bio.  It has already been counted
 		 * in bio->bi_phys_segments
 		 */
@@ -1117,6 +1116,11 @@ read_again:
 		goto retry_write;
 	}
 
+	r1_bio_write_done(r1_bio);
+
+	/* In case raid1d snuck in to freeze_array */
+	wake_up(&conf->wait_barrier);
+
 	if (do_sync || !bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f6873fc8e5ee..d7a8468ddeab 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1132,13 +1132,12 @@ retry_write:
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
-	/* Remove the bias on 'remaining' */
-	one_write_done(r10_bio);
-
-	/* In case raid10d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
+	/* Don't remove the bias on 'remaining' (one_write_done) until
+	 * after checking if we need to go around again.
+	 */
 
 	if (sectors_handled < (bio->bi_size >> 9)) {
+		one_write_done(r10_bio);
 		/* We need another r10_bio.  It has already been counted
 		 * in bio->bi_phys_segments.
 		 */
@@ -1152,6 +1151,10 @@ retry_write:
 		r10_bio->state = 0;
 		goto retry_write;
 	}
+	one_write_done(r10_bio);
+
+	/* In case raid10d snuck in to freeze_array */
+	wake_up(&conf->wait_barrier);
 
 	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);

From 27a7b260f71439c40546b43588448faac01adb93 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:21:28 +1000
Subject: [PATCH 70/77] md: Fix handling for devices from 2TB to 4TB in 0.90
 metadata.

0.90 metadata uses an unsigned 32bit number to count the number of
kilobytes used from each device.
This should allow up to 4TB per device.
However we multiply this by 2 (to get sectors) before casting to a
larger type, so sizes above 2TB get truncated.

Also we allow rdev->sectors to be larger than 4TB, so it is possible
for the array to be resized larger than the metadata can handle.
So make sure rdev->sectors never exceeds 4TB when 0.90 metadata is in
used.

Also the sanity check at the end of super_90_load should include level
1 as it used ->size too. (RAID0 and Linear don't use ->size at all).

Reported-by: Pim Zandbergen <P.Zandbergen@macroscoop.nl>
Cc: stable@kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3742ce8b0acf..5404b2295820 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1138,8 +1138,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 			ret = 0;
 	}
 	rdev->sectors = rdev->sb_start;
+	/* Limit to 4TB as metadata cannot record more than that */
+	if (rdev->sectors >= (2ULL << 32))
+		rdev->sectors = (2ULL << 32) - 2;
 
-	if (rdev->sectors < sb->size * 2 && sb->level > 1)
+	if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
 		/* "this cannot possibly happen" ... */
 		ret = -EINVAL;
 
@@ -1173,7 +1176,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->clevel[0] = 0;
 		mddev->layout = sb->layout;
 		mddev->raid_disks = sb->raid_disks;
-		mddev->dev_sectors = sb->size * 2;
+		mddev->dev_sectors = ((sector_t)sb->size) * 2;
 		mddev->events = ev1;
 		mddev->bitmap_info.offset = 0;
 		mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
@@ -1415,6 +1418,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
 	rdev->sb_start = calc_dev_sboffset(rdev);
 	if (!num_sectors || num_sectors > rdev->sb_start)
 		num_sectors = rdev->sb_start;
+	/* Limit to 4TB as metadata cannot record more than that.
+	 * 4TB == 2^32 KB, or 2*2^32 sectors.
+	 */
+	if (num_sectors >= (2ULL << 32))
+		num_sectors = (2ULL << 32) - 2;
 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
 		       rdev->sb_page);
 	md_super_wait(rdev->mddev);

From c338bfb5ecf6c36b2112479691d69db4c2b5a78a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sat, 10 Sep 2011 20:13:01 +0200
Subject: [PATCH 71/77] backlight: Declare backlight_types[] const

Since backlight_types[] isn't modified, let's declare it const.  That
was probably the intention of the author of commit bb7ca747f8d6
("backlight: add backlight type"), via which the "const char const *"
construct was introduced.  The duplicate const was detected by sparse.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/backlight.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 80d292fb92d8..7363c1b169e8 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -19,7 +19,7 @@
 #include <asm/backlight.h>
 #endif
 
-static const char const *backlight_types[] = {
+static const char *const backlight_types[] = {
 	[BACKLIGHT_RAW] = "raw",
 	[BACKLIGHT_PLATFORM] = "platform",
 	[BACKLIGHT_FIRMWARE] = "firmware",

From 6c6d8deb5d95a0675a8edd588bbc2249cbce5b34 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Thu, 8 Sep 2011 18:45:40 +0100
Subject: [PATCH 72/77] ARM: 7088/1: entry: fix wrong parameter name used in
 do_thumb_abort

Commit be020f8618ca, "ARM: entry: abort-macro: specify registers to be
used for macros", while replacing register numbers with macro parameter
names, mismatched the name used for r1. For me, this resulted in user
space built for EABI with -march=armv4t -mtune=arm920t -mthumb-interwork
-mthumb broken on my OMAP1510 based Amstrad Delta (old ABI and no thumb
still worked for me though).

Fix this by using correct parameter name fsr instead of mismatched psr,
used by callers for another purpose.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/abort-macro.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
index 52162d59407a..2cbf68ef0e83 100644
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -17,7 +17,7 @@
 	cmp	\tmp, # 0x5600			@ Is it ldrsb?
 	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
 	tst	\tmp, #1 << 11			@ L = 0 -> write
-	orreq	\psr, \psr, #1 << 11		@ yes.
+	orreq	\fsr, \fsr, #1 << 11		@ yes.
 	b	do_DataAbort
 not_thumb:
 	.endm

From 8c369264b6de3b2ab796f330a4d85770a6b8b033 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Wed, 3 Aug 2011 18:12:05 +0100
Subject: [PATCH 73/77] ARM: 7009/1: l2x0: Add OF based initialization

This adds probing for ARM L2x0 cache controllers via device tree. Support
includes the L210, L220, and PL310 controllers. The binding allows setting
up cache RAM latencies and filter addresses (PL310 only).

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Barry Song <21cnbao@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 .../devicetree/bindings/arm/l2cc.txt          |  42 +++++++
 arch/arm/include/asm/hardware/cache-l2x0.h    |  17 +++
 arch/arm/mm/cache-l2x0.c                      | 103 ++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/l2cc.txt

diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
new file mode 100644
index 000000000000..f50e021a0998
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -0,0 +1,42 @@
+* ARM L2 Cache Controller
+
+ARM cores often have a separate level 2 cache controller. There are various
+implementations of the L2 cache controller with compatible programming models.
+The ARM L2 cache representation in the device tree should be done as follows:
+
+Required properties:
+
+- compatible : should be one of:
+	"arm,pl310-cache"
+	"arm,l220-cache"
+	"arm,l210-cache"
+- cache-unified : Specifies the cache is a unified cache.
+- cache-level : Should be set to 2 for a level 2 cache.
+- reg : Physical base address and size of cache controller's memory mapped
+  registers.
+
+Optional properties:
+
+- arm,data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of
+  read, write and setup latencies. Minimum valid values are 1. Controllers
+  without setup latency control should use a value of 0.
+- arm,tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
+  read, write and setup latencies. Controllers without setup latency control
+  should use 0. Controllers without separate read and write Tag RAM latency
+  values should only use the first cell.
+- arm,dirty-latency : Cycles of latency for Dirty RAMs. This is a single cell.
+- arm,filter-ranges : <start length> Starting address and length of window to
+  filter. Addresses in the filter window are directed to the M1 port. Other
+  addresses will go to the M0 port.
+
+Example:
+
+L2: cache-controller {
+        compatible = "arm,pl310-cache";
+        reg = <0xfff12000 0x1000>;
+        arm,data-latency = <1 1 1>;
+        arm,tag-latency = <2 2 2>;
+        arm,filter-latency = <0x80000000 0x8000000>;
+        cache-unified;
+        cache-level = <2>;
+};
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 99a6ed7e1bfd..c48cb1e1c46c 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -52,6 +52,8 @@
 #define L2X0_LOCKDOWN_WAY_D_BASE	0x900
 #define L2X0_LOCKDOWN_WAY_I_BASE	0x904
 #define L2X0_LOCKDOWN_STRIDE		0x08
+#define L2X0_ADDR_FILTER_START		0xC00
+#define L2X0_ADDR_FILTER_END		0xC04
 #define L2X0_TEST_OPERATION		0xF00
 #define L2X0_LINE_DATA			0xF10
 #define L2X0_LINE_TAG			0xF30
@@ -67,6 +69,14 @@
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
 
 #define L2X0_AUX_CTRL_MASK			0xc0000fff
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT	0
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK	0x7
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT	3
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK	(0x7 << 3)
+#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT		6
+#define L2X0_AUX_CTRL_TAG_LATENCY_MASK		(0x7 << 6)
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT	9
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK	(0x7 << 9)
 #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT	16
 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT		17
 #define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x7 << 17)
@@ -77,8 +87,15 @@
 #define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT	29
 #define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT		30
 
+#define L2X0_LATENCY_CTRL_SETUP_SHIFT	0
+#define L2X0_LATENCY_CTRL_RD_SHIFT	4
+#define L2X0_LATENCY_CTRL_WR_SHIFT	8
+
+#define L2X0_ADDR_FILTER_EN		1
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask);
+extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask);
 #endif
 
 #endif
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 9ecfdb511951..db4484f5bf98 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -16,9 +16,12 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -372,3 +375,103 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
 			ways, cache_id, aux, l2x0_size);
 }
+
+#ifdef CONFIG_OF
+static void __init l2x0_of_setup(const struct device_node *np,
+				 __u32 *aux_val, __u32 *aux_mask)
+{
+	u32 data[2] = { 0, 0 };
+	u32 tag = 0;
+	u32 dirty = 0;
+	u32 val = 0, mask = 0;
+
+	of_property_read_u32(np, "arm,tag-latency", &tag);
+	if (tag) {
+		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+	}
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1]) {
+		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+	}
+
+	of_property_read_u32(np, "arm,dirty-latency", &dirty);
+	if (dirty) {
+		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static void __init pl310_of_setup(const struct device_node *np,
+				  __u32 *aux_val, __u32 *aux_mask)
+{
+	u32 data[3] = { 0, 0, 0 };
+	u32 tag[3] = { 0, 0, 0 };
+	u32 filter[2] = { 0, 0 };
+
+	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+	if (tag[0] && tag[1] && tag[2])
+		writel_relaxed(
+			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
+			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
+			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
+			l2x0_base + L2X0_TAG_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1] && data[2])
+		writel_relaxed(
+			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
+			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
+			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
+			l2x0_base + L2X0_DATA_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,filter-ranges",
+				   filter, ARRAY_SIZE(filter));
+	if (filter[0] && filter[1]) {
+		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
+			       l2x0_base + L2X0_ADDR_FILTER_END);
+		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
+			       l2x0_base + L2X0_ADDR_FILTER_START);
+	}
+}
+
+static const struct of_device_id l2x0_ids[] __initconst = {
+	{ .compatible = "arm,pl310-cache", .data = pl310_of_setup },
+	{ .compatible = "arm,l220-cache", .data = l2x0_of_setup },
+	{ .compatible = "arm,l210-cache", .data = l2x0_of_setup },
+	{}
+};
+
+int __init l2x0_of_init(__u32 aux_val, __u32 aux_mask)
+{
+	struct device_node *np;
+	void (*l2_setup)(const struct device_node *np,
+		__u32 *aux_val, __u32 *aux_mask);
+
+	np = of_find_matching_node(NULL, l2x0_ids);
+	if (!np)
+		return -ENODEV;
+	l2x0_base = of_iomap(np, 0);
+	if (!l2x0_base)
+		return -ENOMEM;
+
+	/* L2 configuration can only be changed if the cache is disabled */
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		l2_setup = of_match_node(l2x0_ids, np)->data;
+		if (l2_setup)
+			l2_setup(np, &aux_val, &aux_mask);
+	}
+	l2x0_init(l2x0_base, aux_val, aux_mask);
+	return 0;
+}
+#endif

From 1caf30924f71ae16a26aa59b02a6401f467bf1c8 Mon Sep 17 00:00:00 2001
From: Barry Song <21cnbao@gmail.com>
Date: Fri, 9 Sep 2011 10:30:34 +0100
Subject: [PATCH 74/77] ARM: 7089/1: L2X0: add explicit cpu_relax() for busy
 wait loop

using cpu_relax in busy loops is a well-known idiom in the kernel.
It's more for documentation purposes than technically needed here.

Signed-off-by: Barry Song <Baohua.Song@csr.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index db4484f5bf98..a78044885ed5 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -37,7 +37,7 @@ static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
 	while (readl_relaxed(reg) & mask)
-		;
+		cpu_relax();
 }
 
 #ifdef CONFIG_CACHE_PL310

From 74d41f39a9c161cd0434bb13d929d75fc7be75bd Mon Sep 17 00:00:00 2001
From: Barry Song <21cnbao@gmail.com>
Date: Wed, 14 Sep 2011 03:20:01 +0100
Subject: [PATCH 75/77] ARM: 7090/1: CACHE-L2X0: filter start address can be 0
 and is often 0

this patch fixes the error in Rob Herring's
ARM: 7009/1: l2x0: Add OF based initialization
http://www.spinics.net/lists/arm-kernel/msg131123.html
it has been in rmk/for-next with commit 41c86ff5b

Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Acked-by: Rob Herring <robherring2@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index a78044885ed5..0d85d221d7b0 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -437,7 +437,7 @@ static void __init pl310_of_setup(const struct device_node *np,
 
 	of_property_read_u32_array(np, "arm,filter-ranges",
 				   filter, ARRAY_SIZE(filter));
-	if (filter[0] && filter[1]) {
+	if (filter[1]) {
 		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
 			       l2x0_base + L2X0_ADDR_FILTER_END);
 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,

From 8d4e652d1b2539196efaef051956fa29e22e9c10 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 17 Aug 2011 18:03:17 +0100
Subject: [PATCH 76/77] ARM: 7023/1: L2x0: Add interrupts property to OF
 binding

Following the discussion here:
http://lists.ozlabs.org/pipermail/devicetree-discuss/2011-August/007301.html

The L2x0 L2 Cache Controllers support a combined interrupt line
which can be used for several events (e.g. read/write/parity errors on
tag/data RAM, event counter increment/overflow). Unfortunately the
OF binding added in c519ecf2 ("ARM: 7009/1: l2x0: Add OF based
initialization") does not represent the interrupt.

This patch adds an "interrupts" property to the L2x0 OF binding,
representing the combined interrupt line.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Cc: Barry Song <21cnbao@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/devicetree/bindings/arm/l2cc.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
index f50e021a0998..7ca52161e7ab 100644
--- a/Documentation/devicetree/bindings/arm/l2cc.txt
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -28,6 +28,7 @@ Optional properties:
 - arm,filter-ranges : <start length> Starting address and length of window to
   filter. Addresses in the filter window are directed to the M1 port. Other
   addresses will go to the M0 port.
+- interrupts : 1 combined interrupt.
 
 Example:
 
@@ -39,4 +40,5 @@ L2: cache-controller {
         arm,filter-latency = <0x80000000 0x8000000>;
         cache-unified;
         cache-level = <2>;
+	interrupts = <45>;
 };

From 91c2ebb90b1890abc648ba9dec5608cbc97e1cb9 Mon Sep 17 00:00:00 2001
From: Barry Song <Baohua.Song@csr.com>
Date: Fri, 30 Sep 2011 14:43:12 +0100
Subject: [PATCH 77/77] ARM: 7114/1: cache-l2x0: add resume entry for l2 in
 secure mode

we save the l2x0 registers at the first initialization, and platform codes
can get them to restore l2x0 status after wakeup.

Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hardware/cache-l2x0.h |  25 ++++
 arch/arm/include/asm/outercache.h          |   7 ++
 arch/arm/kernel/asm-offsets.c              |  12 ++
 arch/arm/mm/cache-l2x0.c                   | 129 +++++++++++++++++++--
 4 files changed, 163 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index c48cb1e1c46c..434edccdf7f3 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -67,6 +67,13 @@
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
+#define L2X0_CACHE_ID_RTL_MASK          0x3f
+#define L2X0_CACHE_ID_RTL_R0P0          0x0
+#define L2X0_CACHE_ID_RTL_R1P0          0x2
+#define L2X0_CACHE_ID_RTL_R2P0          0x4
+#define L2X0_CACHE_ID_RTL_R3P0          0x5
+#define L2X0_CACHE_ID_RTL_R3P1          0x6
+#define L2X0_CACHE_ID_RTL_R3P2          0x8
 
 #define L2X0_AUX_CTRL_MASK			0xc0000fff
 #define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT	0
@@ -96,6 +103,24 @@
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask);
 extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask);
+
+struct l2x0_regs {
+	unsigned long phy_base;
+	unsigned long aux_ctrl;
+	/*
+	 * Whether the following registers need to be saved/restored
+	 * depends on platform
+	 */
+	unsigned long tag_latency;
+	unsigned long data_latency;
+	unsigned long filter_start;
+	unsigned long filter_end;
+	unsigned long prefetch_ctrl;
+	unsigned long pwr_ctrl;
+};
+
+extern struct l2x0_regs l2x0_saved_regs;
+
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index d8387437ec5a..53426c66352a 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -34,6 +34,7 @@ struct outer_cache_fns {
 	void (*sync)(void);
 #endif
 	void (*set_debug)(unsigned long);
+	void (*resume)(void);
 };
 
 #ifdef CONFIG_OUTER_CACHE
@@ -74,6 +75,12 @@ static inline void outer_disable(void)
 		outer_cache.disable();
 }
 
+static inline void outer_resume(void)
+{
+	if (outer_cache.resume)
+		outer_cache.resume();
+}
+
 #else
 
 static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 16baba2e4369..1429d8989fb9 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -20,6 +20,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/procinfo.h>
+#include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
 /*
@@ -92,6 +93,17 @@ int main(void)
   DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
+#ifdef CONFIG_CACHE_L2X0
+  DEFINE(L2X0_R_PHY_BASE,	offsetof(struct l2x0_regs, phy_base));
+  DEFINE(L2X0_R_AUX_CTRL,	offsetof(struct l2x0_regs, aux_ctrl));
+  DEFINE(L2X0_R_TAG_LATENCY,	offsetof(struct l2x0_regs, tag_latency));
+  DEFINE(L2X0_R_DATA_LATENCY,	offsetof(struct l2x0_regs, data_latency));
+  DEFINE(L2X0_R_FILTER_START,	offsetof(struct l2x0_regs, filter_start));
+  DEFINE(L2X0_R_FILTER_END,	offsetof(struct l2x0_regs, filter_end));
+  DEFINE(L2X0_R_PREFETCH_CTRL,	offsetof(struct l2x0_regs, prefetch_ctrl));
+  DEFINE(L2X0_R_PWR_CTRL,	offsetof(struct l2x0_regs, pwr_ctrl));
+  BLANK();
+#endif
 #ifdef CONFIG_CPU_HAS_ASID
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
   BLANK();
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 0d85d221d7b0..3f9b9980478e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -33,6 +33,14 @@ static DEFINE_SPINLOCK(l2x0_lock);
 static uint32_t l2x0_way_mask;	/* Bitmask of active ways */
 static uint32_t l2x0_size;
 
+struct l2x0_regs l2x0_saved_regs;
+
+struct l2x0_of_data {
+	void (*setup)(const struct device_node *, __u32 *, __u32 *);
+	void (*save)(void);
+	void (*resume)(void);
+};
+
 static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
@@ -280,7 +288,7 @@ static void l2x0_disable(void)
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void __init l2x0_unlock(__u32 cache_id)
+static void l2x0_unlock(__u32 cache_id)
 {
 	int lockregs;
 	int i;
@@ -356,6 +364,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 		/* l2x0 controller is disabled */
 		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
 
+		l2x0_saved_regs.aux_ctrl = aux;
+
 		l2x0_inv_all();
 
 		/* enable L2X0 */
@@ -445,33 +455,132 @@ static void __init pl310_of_setup(const struct device_node *np,
 	}
 }
 
+static void __init pl310_save(void)
+{
+	u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
+		L2X0_CACHE_ID_RTL_MASK;
+
+	l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base +
+		L2X0_TAG_LATENCY_CTRL);
+	l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base +
+		L2X0_DATA_LATENCY_CTRL);
+	l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base +
+		L2X0_ADDR_FILTER_END);
+	l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base +
+		L2X0_ADDR_FILTER_START);
+
+	if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
+		/*
+		 * From r2p0, there is Prefetch offset/control register
+		 */
+		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base +
+			L2X0_PREFETCH_CTRL);
+		/*
+		 * From r3p0, there is Power control register
+		 */
+		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
+			l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base +
+				L2X0_POWER_CTRL);
+	}
+}
+
+static void l2x0_resume(void)
+{
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* restore aux ctrl and enable l2 */
+		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
+
+		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
+			L2X0_AUX_CTRL);
+
+		l2x0_inv_all();
+
+		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+	}
+}
+
+static void pl310_resume(void)
+{
+	u32 l2x0_revision;
+
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* restore pl310 setup */
+		writel_relaxed(l2x0_saved_regs.tag_latency,
+			l2x0_base + L2X0_TAG_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.data_latency,
+			l2x0_base + L2X0_DATA_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.filter_end,
+			l2x0_base + L2X0_ADDR_FILTER_END);
+		writel_relaxed(l2x0_saved_regs.filter_start,
+			l2x0_base + L2X0_ADDR_FILTER_START);
+
+		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
+			L2X0_CACHE_ID_RTL_MASK;
+
+		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
+			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+				l2x0_base + L2X0_PREFETCH_CTRL);
+			if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
+				writel_relaxed(l2x0_saved_regs.pwr_ctrl,
+					l2x0_base + L2X0_POWER_CTRL);
+		}
+	}
+
+	l2x0_resume();
+}
+
+static const struct l2x0_of_data pl310_data = {
+	pl310_of_setup,
+	pl310_save,
+	pl310_resume,
+};
+
+static const struct l2x0_of_data l2x0_data = {
+	l2x0_of_setup,
+	NULL,
+	l2x0_resume,
+};
+
 static const struct of_device_id l2x0_ids[] __initconst = {
-	{ .compatible = "arm,pl310-cache", .data = pl310_of_setup },
-	{ .compatible = "arm,l220-cache", .data = l2x0_of_setup },
-	{ .compatible = "arm,l210-cache", .data = l2x0_of_setup },
+	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
+	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
+	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
 	{}
 };
 
 int __init l2x0_of_init(__u32 aux_val, __u32 aux_mask)
 {
 	struct device_node *np;
-	void (*l2_setup)(const struct device_node *np,
-		__u32 *aux_val, __u32 *aux_mask);
+	struct l2x0_of_data *data;
+	struct resource res;
 
 	np = of_find_matching_node(NULL, l2x0_ids);
 	if (!np)
 		return -ENODEV;
-	l2x0_base = of_iomap(np, 0);
+
+	if (of_address_to_resource(np, 0, &res))
+		return -ENODEV;
+
+	l2x0_base = ioremap(res.start, resource_size(&res));
 	if (!l2x0_base)
 		return -ENOMEM;
 
+	l2x0_saved_regs.phy_base = res.start;
+
+	data = of_match_node(l2x0_ids, np)->data;
+
 	/* L2 configuration can only be changed if the cache is disabled */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
-		l2_setup = of_match_node(l2x0_ids, np)->data;
-		if (l2_setup)
-			l2_setup(np, &aux_val, &aux_mask);
+		if (data->setup)
+			data->setup(np, &aux_val, &aux_mask);
 	}
+
+	if (data->save)
+		data->save();
+
 	l2x0_init(l2x0_base, aux_val, aux_mask);
+
+	outer_cache.resume = data->resume;
 	return 0;
 }
 #endif