From 5185352c163a72cf969b2fbbfb89801b398896fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 14:48:11 -0700 Subject: [PATCH 01/77] libceph: fix msgpool There were several problems here: 1- we weren't tagging allocations with the pool, so they were never returned to the pool. 2- msgpool_put didn't add back to the mempool, even it were called. 3- msgpool_release didn't clear the pool pointer, so it would have looped had #1 not been broken. These may or may not have been responsible for #1136 or #1381 (BUG due to non-empty mempool on umount). I can't seem to trigger the crash now using the method I was using before. Signed-off-by: Sage Weil --- net/ceph/msgpool.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index d5f2d97ac05c..1f4cb30a42c5 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -7,27 +7,37 @@ #include -static void *alloc_fn(gfp_t gfp_mask, void *arg) +static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - void *p; + struct ceph_msg *msg; - p = ceph_msg_new(0, pool->front_len, gfp_mask); - if (!p) - pr_err("msgpool %s alloc failed\n", pool->name); - return p; + msg = ceph_msg_new(0, pool->front_len, gfp_mask); + if (!msg) { + dout("msgpool_alloc %s failed\n", pool->name); + } else { + dout("msgpool_alloc %s %p\n", pool->name, msg); + msg->pool = pool; + } + return msg; } -static void free_fn(void *element, void *arg) +static void msgpool_free(void *element, void *arg) { - ceph_msg_put(element); + struct ceph_msgpool *pool = arg; + struct ceph_msg *msg = element; + + dout("msgpool_release %s %p\n", pool->name, msg); + msg->pool = NULL; + ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking, const char *name) { + dout("msgpool %s init\n", name); pool->front_len = front_len; - pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; @@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool, void ceph_msgpool_destroy(struct ceph_msgpool *pool) { + dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { + struct ceph_msg *msg; + if (front_len > pool->front_len) { - pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); @@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, return ceph_msg_new(0, front_len, GFP_NOFS); } - return mempool_alloc(pool->pool, GFP_NOFS); + msg = mempool_alloc(pool->pool, GFP_NOFS); + dout("msgpool_get %s %p\n", pool->name, msg); + return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { + dout("msgpool_put %s %p\n", pool->name, msg); + /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); kref_init(&msg->kref); /* retake single ref */ + mempool_free(msg, pool->pool); } From 4f6fdf08681cecd9f38499de7a02eb4f05f399a7 Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Fri, 5 Aug 2011 09:16:57 -0700 Subject: [PATCH 02/77] HID: magicmouse: Set resolution of touch surfaces Add touch surface resolution information. The size of the touch surfaces has been determined to the hundredth of a mm. Cc: Jiri Kosina Cc: Michael Poole Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Chase Douglas [jkosina@suse.cz: update comments and commit message] Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 56 +++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 0ec91c18a421..b5bdab3299bc 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -81,6 +81,28 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie #define NO_TOUCHES -1 #define SINGLE_TOUCH_UP -2 +/* Touch surface information. Dimension is in hundredths of a mm, min and max + * are in units. */ +#define MOUSE_DIMENSION_X (float)9056 +#define MOUSE_MIN_X -1100 +#define MOUSE_MAX_X 1258 +#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100)) +#define MOUSE_DIMENSION_Y (float)5152 +#define MOUSE_MIN_Y -1589 +#define MOUSE_MAX_Y 2047 +#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100)) + +#define TRACKPAD_DIMENSION_X (float)13000 +#define TRACKPAD_MIN_X -2909 +#define TRACKPAD_MAX_X 3167 +#define TRACKPAD_RES_X \ + ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100)) +#define TRACKPAD_DIMENSION_Y (float)11000 +#define TRACKPAD_MIN_Y -2456 +#define TRACKPAD_MAX_Y 2565 +#define TRACKPAD_RES_Y \ + ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) + /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. @@ -406,17 +428,31 @@ static void magicmouse_setup_input(struct input_dev *input, struct hid_device *h * inverse of the reported Y. */ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { - input_set_abs_params(input, ABS_MT_POSITION_X, -1100, - 1358, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -1589, - 2047, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + MOUSE_MIN_X, MOUSE_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_MT_POSITION_X, + MOUSE_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + MOUSE_RES_Y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ - input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0); - input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_X, -2909, - 3167, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -2456, - 2565, 4, 0); + input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X, + TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y, + TRACKPAD_MAX_Y, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_X, TRACKPAD_RES_X); + input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y); + input_abs_set_res(input, ABS_MT_POSITION_X, + TRACKPAD_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + TRACKPAD_RES_Y); } input_set_events_per_packet(input, 60); From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:25:35 -0700 Subject: [PATCH 03/77] alarmtimers: Avoid possible null pointer traversal We don't check if old_setting is non null before assigning it, so correct this. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 59f369f98a04..1dee3f62a6a7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* Save old values */ - old_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); - old_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + if (old_setting) + alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ alarm_cancel(&timr->it.alarmtimer); From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:51:56 -0700 Subject: [PATCH 04/77] alarmtimers: Memset itimerspec passed into alarm_timer_get Following common_timer_get, zero out the itimerspec passed in. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 1dee3f62a6a7..0e9263f6fd09 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer) static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { + memset(cur_setting, 0, sizeof(struct itimerspec)); + cur_setting->it_interval = ktime_to_timespec(timr->it.alarmtimer.period); cur_setting->it_value = From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 10:26:09 -0700 Subject: [PATCH 05/77] alarmtimers: Avoid possible denial of service with high freq periodic timers Its possible to jam up the alarm timers by setting very small interval timers, which will cause the alarmtimer subsystem to spend all of its time firing and restarting timers. This can effectivly lock up a box. A deeper fix is needed, closely mimicking the hrtimer code, but for now just cap the interval to 100us to avoid userland hanging the system. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 0e9263f6fd09..ea5e1a928d5b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; + /* + * XXX HACK! Currently we can DOS a system if the interval + * period on alarmtimers is too small. Cap the interval here + * to 100us and solve this properly in a future patch! -jstultz + */ + if ((new_setting->it_interval.tv_sec == 0) && + (new_setting->it_interval.tv_nsec < 100000)) + new_setting->it_interval.tv_nsec = 100000; + if (old_setting) alarm_timer_get(timr, old_setting); From bf6ed027bcc93f8d54d321fe87f0434b25699eb1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 10 Aug 2011 21:11:26 +0800 Subject: [PATCH 06/77] rtc: ep93xx: Fix 'rtc' may be used uninitialized warning commit 92d921c5d "rtc: ep93xx: Initialize drvdata before registering device" ensures the drvdata is initialized prior to registering the rtc device. But it set the drvdata to an uninitialized pointer. Thus calling platform_get_drvdata in ep93xx_rtc_remove does not get correct address. This patch fixes below warning by adding struct rtc_device *rtc to struct ep93xx_rtc. Then set platform drvdata to ep93xx_rtc instead of rtc. CC drivers/rtc/rtc-ep93xx.o drivers/rtc/rtc-ep93xx.c: In function 'ep93xx_rtc_probe': drivers/rtc/rtc-ep93xx.c:154: warning: 'rtc' may be used uninitialized in this function Signed-off-by: Axel Lin Signed-off-by: John Stultz --- drivers/rtc/rtc-ep93xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 335551d333b2..14a42a1edc66 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -36,6 +36,7 @@ */ struct ep93xx_rtc { void __iomem *mmio_base; + struct rtc_device *rtc; }; static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, @@ -130,7 +131,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc; struct resource *res; - struct rtc_device *rtc; int err; ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL); @@ -151,12 +151,12 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return -ENXIO; pdev->dev.platform_data = ep93xx_rtc; - platform_set_drvdata(pdev, rtc); + platform_set_drvdata(pdev, ep93xx_rtc); - rtc = rtc_device_register(pdev->name, + ep93xx_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - err = PTR_ERR(rtc); + if (IS_ERR(ep93xx_rtc->rtc)) { + err = PTR_ERR(ep93xx_rtc->rtc); goto exit; } @@ -167,7 +167,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return 0; fail: - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); exit: platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; @@ -176,11 +176,11 @@ exit: static int __exit ep93xx_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); + struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); pdev->dev.platform_data = NULL; return 0; From dec35d19c4ec65b94df3b27b6e373f0d48c9cd32 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 16 Mar 2011 06:07:14 +0000 Subject: [PATCH 07/77] rtc: rtc-twl: Switch to using threaded irq The driver is accessing to i2c bus in interrupt handler. Therefore, it should use threaded irq. Signed-off-by: Ilkka Koskinen Acked-by: Balaji T K Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 9a81f778d6b2..1963cddbf214 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -462,7 +462,7 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev) if (ret < 0) goto out1; - ret = request_irq(irq, twl_rtc_interrupt, + ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, IRQF_TRIGGER_RISING, dev_name(&rtc->dev), rtc); if (ret < 0) { From 34d623d11316cb69f9e8cc5eb50d3792b5c302b6 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 31 May 2011 08:51:39 +0000 Subject: [PATCH 08/77] rtc: rtc-twl: Remove lockdep related local_irq_enable() Now that the irq is properly threaded (due to it needing i2c access) we should also remove the local_irq_enable() call in twl_rtc_interrupt. Testing this with Pandaboard, the RTC is still working. [Reworked commit message -jstultz] Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 1963cddbf214..9677bbc433f9 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -362,14 +362,6 @@ static irqreturn_t twl_rtc_interrupt(int irq, void *rtc) int res; u8 rd_reg; -#ifdef CONFIG_LOCKDEP - /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which - * we don't want and can't tolerate. Although it might be - * friendlier not to borrow this thread context... - */ - local_irq_enable(); -#endif - res = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (res) goto out; From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: [PATCH 09/77] rtc: Fix RTC PIE frequency limit Thomas earlier submitted a fix to limit the RTC PIE freq, but picked 5000Hz out of the air. Willy noticed that we should instead use the 8192Hz max from the rtc man documentation. Cc: Willy Tarreau Cc: stable@kernel.org Cc: Thomas Gleixner Signed-off-by: John Stultz --- drivers/rtc/interface.c | 2 +- include/linux/rtc.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 3195dbd3ec34..eb4c88316a15 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) int err = 0; unsigned long flags; - if (freq <= 0 || freq > 5000) + if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea25660..93f4d035076b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -97,6 +97,9 @@ struct rtc_pll_info { #define RTC_AF 0x20 /* Alarm interrupt */ #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + +#define RTC_MAX_FREQ 8192 + #ifdef __KERNEL__ #include From 795858dbd253462a67e14272edeaae73c6074b17 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Aug 2011 13:02:37 -0700 Subject: [PATCH 10/77] ceph: fix encoding of ino only (not relative) paths A 'path' consists of a starting ino and relative component. Encode even when there is no relative component. This is primarily needed by the NFS reexport code. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fee028b5332e..86c59e16ba74 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); - } else if (rpath) { + } else if (rpath || rino) { *ino = rino; *ppath = rpath; *pathlen = strlen(rpath); From 016f1c54408b1e92e2e8087bfc05ca0a9c258513 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 11 Aug 2011 12:29:46 +0200 Subject: [PATCH 11/77] UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled With $ grep -e UBIFS_FS_DEBUG -e DYNAMIC_DEBUG .config # CONFIG_UBIFS_FS_DEBUG is not set CONFIG_DYNAMIC_DEBUG=y Debug messages are kept in the object files due to the dynamic_pr_debug() macro, even if they are never going to be printed: $ make fs/ubifs/super.o $ strings fs/ubifs/super.o | grep 'compiled on' compiled on: Aug 11 2011 at 12:21:38 Use plain printk to fix this. Signed-off-by: Michal Marek Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 45174b534377..feb361e252ac 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define ubifs_dbg_msg(fmt, ...) do { \ - if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ +#define ubifs_dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_dump_stack() From 9efabc84768ee8e79b50ad6ad6cff94d66da01f7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 19 Aug 2011 19:02:27 +0300 Subject: [PATCH 12/77] UBI: do not link debug messages when debugging is disabled Michal Marek spotted the same issue in UBIFS and this patch fixes UBI, see "UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled" When UBI debugging is disabled, we have debugging messages defined as: if (0) pr_debug() But pr_debug macro defines data structures with debugging data and makes the linux binary larger, even though we have "if (0)". Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 65b5b76cc379..64fbb0021825 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -181,7 +181,7 @@ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) #define ubi_dbg_msg(fmt, ...) do { \ if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) From 259a187ade45056fd44856654f78aa9e9f0f7c75 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Mon, 22 Aug 2011 13:49:41 -0600 Subject: [PATCH 13/77] ceph: fix memory leak kfree does not clean up indirect allocations in ceph_fs_client and ceph_options (e.g. snapdir_name). Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- fs/ceph/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d47c5ec7fb1f..88bacaf385d9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, fsc = create_fs_client(fsopt, opt); if (IS_ERR(fsc)) { res = ERR_CAST(fsc); - kfree(fsopt); - kfree(opt); + destroy_mount_options(fsopt); + ceph_destroy_options(opt); goto out_final; } From 35d851df23b093ee027f827fed2213ae5e88fc7a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 25 Aug 2011 14:21:37 +0200 Subject: [PATCH 14/77] HID: magicmouse: ignore 'ivalid report id' while switching modes, v2 This is basically a more generic respin of 23746a6 ("HID: magicmouse: ignore 'ivalid report id' while switching modes") which got reverted later by c3a492. It turns out that on some configurations, this is actually still the case and we are not able to detect in runtime. The device reponds with 'invalid report id' when feature report switching it into multitouch mode is sent to it. This has been silently ignored before 0825411ade ("HID: bt: Wait for ACK on Sent Reports"), but since this commit, it propagates -EIO from the _raw callback . So let the driver ignore -EIO as response to 0xd7,0x01 report, as that's how the device reacts in normal mode. Sad, but following reality. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=35022 Reported-by: Chase Douglas Reported-by: Jaikumar Ganesh Tested-by: Chase Douglas Tested-by: Jaikumar Ganesh Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index b5bdab3299bc..f0fbd7bd239e 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -537,9 +537,17 @@ static int magicmouse_probe(struct hid_device *hdev, } report->size = 6; + /* + * Some devices repond with 'invalid report id' when feature + * report switching it into multitouch mode is sent to it. + * + * This results in -EIO from the _raw low-level transport callback, + * but there seems to be no other way of switching the mode. + * Thus the super-ugly hacky success check below. + */ ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature), HID_FEATURE_REPORT); - if (ret != sizeof(feature)) { + if (ret != -EIO && ret != sizeof(feature)) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; } From 6d1db0777981e1626ae71243984ac300b61789ff Mon Sep 17 00:00:00 2001 From: Clemens Werther Date: Thu, 25 Aug 2011 15:35:14 +0200 Subject: [PATCH 15/77] HID: add support for HuiJia USB Gamepad connector Create each gamepad as a separate joystick Signed-off-by: Clemens Werther Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7d27d2b0445a..7484e1b67249 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 #define USB_DEVICE_ID_GRETAGMACBETH_HUEY 0x2005 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 4bdb5d46c52c..3146fdcda272 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -47,6 +47,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL }, { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT }, From a7402deb324f62106566f5a95199a54c41e200ef Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Fri, 12 Aug 2011 21:04:30 +0000 Subject: [PATCH 16/77] rtc: Initialized rtc_time->tm_isdst Even though the Linux kernel does not use the tm_isdst field, it is exposed as part of the ABI. This field can accidentally be left initialized, which is why we currently memset buffers returned to userland in rtc_read_time. There is a case however where the field can return garbage from the stack though when using the RTC_ALM_READ ioctl on the rtc device. This ioctl invokes rtc_read_alarm, which is careful to memset the rtc_wkalrm buffer that is copied to userland, but it then uses a struct copy to assign to alarm->time given the return value from rtc_ktime_to_tm(). rtc_ktime_to_tm() is implemented by calling rtc_time_to_tm using a derivative seconds counds from ktime, but rtc_time_to_tm does not assign a value to ->tm_isdst. This results in garbage from rtc_ktime_to_tm()'s frame ending up being copied out to userland as part of the returned rtc_wkalrm. Fix this by initializing rtc_time->tm_isdst to 0 in rtc_time_to_tm. Signed-off-by: Mike Waychison Signed-off-by: John Stultz --- drivers/rtc/rtc-lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c index 075f1708deae..c4cf05731118 100644 --- a/drivers/rtc/rtc-lib.c +++ b/drivers/rtc/rtc-lib.c @@ -85,6 +85,8 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) time -= tm->tm_hour * 3600; tm->tm_min = time / 60; tm->tm_sec = time - tm->tm_min * 60; + + tm->tm_isdst = 0; } EXPORT_SYMBOL(rtc_time_to_tm); From 7e72c686347562b4a275c97b4bdd7a79c1f23c65 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 10 Aug 2011 20:20:36 -0700 Subject: [PATCH 17/77] rtc: twl: Fix registration vs. init order Only register as an RTC device after the hardware has been successfully initialized. The RTC class driver will call back to this driver to read a pending alarm, and other drivers watching for new devices on the RTC class may read the RTC time upon registration. Such access might occur while the RTC is stopped, prior to clearing pending alarms, etc. The new ordering also avoids leaving the platform device drvdata set to an unregistered struct rtc_device * on probe errors. Signed-off-by: Todd Poynor Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 54 ++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 9677bbc433f9..20687d55e7a7 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -420,24 +420,12 @@ static struct rtc_class_ops twl_rtc_ops = { static int __devinit twl_rtc_probe(struct platform_device *pdev) { struct rtc_device *rtc; - int ret = 0; + int ret = -EINVAL; int irq = platform_get_irq(pdev, 0); u8 rd_reg; if (irq <= 0) - return -EINVAL; - - rtc = rtc_device_register(pdev->name, - &pdev->dev, &twl_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - ret = PTR_ERR(rtc); - dev_err(&pdev->dev, "can't register RTC device, err %ld\n", - PTR_ERR(rtc)); - goto out0; - - } - - platform_set_drvdata(pdev, rtc); + goto out1; ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (ret < 0) @@ -454,14 +442,6 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev) if (ret < 0) goto out1; - ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, - IRQF_TRIGGER_RISING, - dev_name(&rtc->dev), rtc); - if (ret < 0) { - dev_err(&pdev->dev, "IRQ is not free.\n"); - goto out1; - } - if (twl_class_is_6030()) { twl6030_interrupt_unmask(TWL6030_RTC_INT_MASK, REG_INT_MSK_LINE_A); @@ -472,28 +452,44 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev) /* Check RTC module status, Enable if it is off */ ret = twl_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG); if (ret < 0) - goto out2; + goto out1; if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) { dev_info(&pdev->dev, "Enabling TWL-RTC.\n"); rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M; ret = twl_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG); if (ret < 0) - goto out2; + goto out1; } /* init cached IRQ enable bits */ ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG); if (ret < 0) - goto out2; + goto out1; - return ret; + rtc = rtc_device_register(pdev->name, + &pdev->dev, &twl_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) { + ret = PTR_ERR(rtc); + dev_err(&pdev->dev, "can't register RTC device, err %ld\n", + PTR_ERR(rtc)); + goto out1; + } + + ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, + IRQF_TRIGGER_RISING, + dev_name(&rtc->dev), rtc); + if (ret < 0) { + dev_err(&pdev->dev, "IRQ is not free.\n"); + goto out2; + } + + platform_set_drvdata(pdev, rtc); + return 0; out2: - free_irq(irq, rtc); -out1: rtc_device_unregister(rtc); -out0: +out1: return ret; } From 66506f761772c87fd4ff31b94b298888d5d58d77 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 15 Aug 2011 10:28:18 +0800 Subject: [PATCH 18/77] mmc: sdhci-esdhc-imx: add missing inclusion of linux/module.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are the following warnings and errorx when compiling the driver. The patch adds the missing inclusion of linux/module.h to fix them. drivers/mmc/host/sdhci-esdhc-imx.c:563:12: error: ‘THIS_MODULE’ undeclared here (not in a function) [..] Signed-off-by: Shawn Guo Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-esdhc-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 0e9780f5a4a9..4dc0028086a3 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include From c259e01a1ec90063042f758e409cd26b2a0963c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Jun 2011 19:47:00 +0200 Subject: [PATCH 19/77] sched: Separate the scheduler entry for preemption Block-IO and workqueues call into notifier functions from the scheduler core code with interrupts and preemption disabled. These calls should be made before entering the scheduler core. To simplify this, separate the scheduler core code into __schedule(). __schedule() is directly called from the places which set PREEMPT_ACTIVE and from schedule(). This allows us to add the work checks into schedule(), so they are only called when a task voluntary goes to sleep. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Cc: Tejun Heo Cc: Jens Axboe Cc: Linus Torvalds Cc: stable@kernel.org # 2.6.39+ Link: http://lkml.kernel.org/r/20110622174918.813258321@linutronix.de Signed-off-by: Ingo Molnar --- kernel/sched.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ccacdbdecf45..ec15e8129cf7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq) } /* - * schedule() is the main scheduler function. + * __schedule() is the main scheduler function. */ -asmlinkage void __sched schedule(void) +static void __sched __schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -4369,6 +4369,11 @@ need_resched: if (need_resched()) goto need_resched; } + +asmlinkage void schedule(void) +{ + __schedule(); +} EXPORT_SYMBOL(schedule); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER @@ -4435,7 +4440,7 @@ asmlinkage void __sched notrace preempt_schedule(void) do { add_preempt_count_notrace(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count_notrace(PREEMPT_ACTIVE); /* @@ -4463,7 +4468,7 @@ asmlinkage void __sched preempt_schedule_irq(void) do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); - schedule(); + __schedule(); local_irq_disable(); sub_preempt_count(PREEMPT_ACTIVE); @@ -5588,7 +5593,7 @@ static inline int should_resched(void) static void __cond_resched(void) { add_preempt_count(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count(PREEMPT_ACTIVE); } From 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Jun 2011 19:47:01 +0200 Subject: [PATCH 20/77] sched: Move blk_schedule_flush_plug() out of __schedule() There is no real reason to run blk_schedule_flush_plug() with interrupts and preemption disabled. Move it into schedule() and call it when the task is going voluntarily to sleep. There might be false positives when the task is woken between that call and actually scheduling, but that's not really different from being woken immediately after switching away. This fixes a deadlock in the scheduler where the blk_schedule_flush_plug() callchain enables interrupts and thereby allows a wakeup to happen of the task that's going to sleep. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Cc: Tejun Heo Cc: Jens Axboe Cc: Linus Torvalds Cc: stable@kernel.org # 2.6.39+ Link: http://lkml.kernel.org/n/tip-dwfxtra7yg1b5r65m32ywtct@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ec15e8129cf7..511732c39b6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4322,16 +4322,6 @@ need_resched: if (to_wakeup) try_to_wake_up_local(to_wakeup); } - - /* - * If we are going to sleep and we have plugged IO - * queued, make sure to submit it to avoid deadlocks. - */ - if (blk_needs_flush_plug(prev)) { - raw_spin_unlock(&rq->lock); - blk_schedule_flush_plug(prev); - raw_spin_lock(&rq->lock); - } } switch_count = &prev->nvcsw; } @@ -4370,8 +4360,23 @@ need_resched: goto need_resched; } +static inline void sched_submit_work(struct task_struct *tsk) +{ + if (!tsk->state) + return; + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) + blk_schedule_flush_plug(tsk); +} + asmlinkage void schedule(void) { + struct task_struct *tsk = current; + + sched_submit_work(tsk); __schedule(); } EXPORT_SYMBOL(schedule); From feff8fa0075bdfd43c841e9d689ed81adda988d6 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 18 Aug 2011 20:36:57 +0800 Subject: [PATCH 21/77] sched: Fix a memory leak in __sdt_free() This patch fixes the following memory leak: unreferenced object 0xffff880107266800 (size 512): comm "sched-powersave", pid 3718, jiffies 4323097853 (age 27495.450s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] create_object+0x187/0x28b [] kmemleak_alloc+0x73/0x98 [] __kmalloc_node+0x104/0x159 [] kzalloc_node.clone.97+0x15/0x17 [] build_sched_domains+0xb7/0x7f3 [] partition_sched_domains+0x1db/0x24a [] do_rebuild_sched_domains+0x3b/0x47 [] rebuild_sched_domains+0x10/0x12 [] sched_power_savings_store+0x6c/0x7b [] sched_mc_power_savings_store+0x16/0x18 [] sysdev_class_store+0x20/0x22 [] sysfs_write_file+0x108/0x144 [] vfs_write+0xaf/0x102 [] sys_write+0x4d/0x74 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff Signed-off-by: WANG Cong Signed-off-by: Peter Zijlstra Cc: stable@kernel.org # 3.0 Link: http://lkml.kernel.org/r/1313671017-4112-1-git-send-email-amwang@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched.c b/kernel/sched.c index 511732c39b6e..c79e7c63a4aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7453,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); if (sd && (sd->flags & SD_OVERLAP)) free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); kfree(*per_cpu_ptr(sdd->sg, j)); kfree(*per_cpu_ptr(sdd->sgp, j)); } From a8d757ef076f0f95f13a918808824058de25b3eb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 25 Aug 2011 15:58:03 +0200 Subject: [PATCH 22/77] perf events: Fix slow and broken cgroup context switch code The current cgroup context switch code was incorrect leading to bogus counts. Furthermore, as soon as there was an active cgroup event on a CPU, the context switch cost on that CPU would increase by a significant amount as demonstrated by a simple ping/pong example: $ ./pong Both processes pinned to CPU1, running for 10s 10684.51 ctxsw/s Now start a cgroup perf stat: $ perf stat -e cycles,cycles -A -a -G test -C 1 -- sleep 100 $ ./pong Both processes pinned to CPU1, running for 10s 6674.61 ctxsw/s That's a 37% penalty. Note that pong is not even in the monitored cgroup. The results shown by perf stat are bogus: $ perf stat -e cycles,cycles -A -a -G test -C 1 -- sleep 100 Performance counter stats for 'sleep 100': CPU1 cycles test CPU1 16,984,189,138 cycles # 0.000 GHz The second 'cycles' event should report a count @ CPU clock (here 2.4GHz) as it is counting across all cgroups. The patch below fixes the bogus accounting and bypasses any cgroup switches in case the outgoing and incoming tasks are in the same cgroup. With this patch the same test now yields: $ ./pong Both processes pinned to CPU1, running for 10s 10775.30 ctxsw/s Start perf stat with cgroup: $ perf stat -e cycles,cycles -A -a -G test -C 1 -- sleep 10 Run pong outside the cgroup: $ /pong Both processes pinned to CPU1, running for 10s 10687.80 ctxsw/s The penalty is now less than 2%. And the results for perf stat are correct: $ perf stat -e cycles,cycles -A -a -G test -C 1 -- sleep 10 Performance counter stats for 'sleep 10': CPU1 cycles test # 0.000 GHz CPU1 23,933,981,448 cycles # 0.000 GHz Now perf stat reports the correct counts for for the non cgroup event. If we run pong inside the cgroup, then we also get the correct counts: $ perf stat -e cycles,cycles -A -a -G test -C 1 -- sleep 10 Performance counter stats for 'sleep 10': CPU1 22,297,726,205 cycles test # 0.000 GHz CPU1 23,933,981,448 cycles # 0.000 GHz 10.001457237 seconds time elapsed Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110825135803.GA4697@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 +++++++++------ kernel/events/core.c | 63 ++++++++++++++++++++++++++++++++------ kernel/sched.c | 2 +- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 245bafdafd5e..c816075c01ce 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -944,8 +944,10 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); -extern void __perf_event_task_sched_in(struct task_struct *task); -extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); +extern void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task); +extern void __perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1059,17 +1061,20 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) extern struct jump_label_key perf_sched_events; -static inline void perf_event_task_sched_in(struct task_struct *task) +static inline void perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { if (static_branch(&perf_sched_events)) - __perf_event_task_sched_in(task); + __perf_event_task_sched_in(prev, task); } -static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +static inline void perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next) { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - __perf_event_task_sched_out(task, next); + if (static_branch(&perf_sched_events)) + __perf_event_task_sched_out(prev, next); } extern void perf_event_mmap(struct vm_area_struct *vma); @@ -1139,10 +1144,11 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_task_tick(void); #else static inline void -perf_event_task_sched_in(struct task_struct *task) { } +perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { } static inline void -perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) { } +perf_event_task_sched_out(struct task_struct *prev, + struct task_struct *next) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index b8785e26ee1c..45847fbb599a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* + * next is NULL when called from perf_event_enable_on_exec() + * that will systematically cause a cgroup_switch() + */ + if (next) + cgrp2 = perf_cgroup_from_task(next); + + /* + * only schedule out current cgroup events if we know + * that we are switching to a different cgroup. Otherwise, + * do no touch the cgroup events. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWOUT); } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { - perf_cgroup_switch(task, PERF_CGROUP_SWIN); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* prev can never be NULL */ + cgrp2 = perf_cgroup_from_task(prev); + + /* + * only need to schedule in cgroup events if we are changing + * cgroup during ctxsw. Cgroup events were not scheduled + * out of ctxsw out if that was not the case. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWIN); } static inline int perf_cgroup_connect(int fd, struct perf_event *event, @@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { } @@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_out(task); + perf_cgroup_sched_out(task, next); } static void task_ctx_sched_out(struct perf_event_context *ctx) @@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void __perf_event_task_sched_in(struct task_struct *task) +void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task) * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_in(task); + perf_cgroup_sched_in(prev, task); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * ctxswin cgroup events which are already scheduled * in. */ - perf_cgroup_sched_out(current); + perf_cgroup_sched_out(current, NULL); raw_spin_lock(&ctx->lock); task_ctx_sched_out(ctx); diff --git a/kernel/sched.c b/kernel/sched.c index ccacdbdecf45..0408cdc6d572 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_disable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - perf_event_task_sched_in(current); + perf_event_task_sched_in(prev, current); #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ From 3b217116edaac634bf31e85c35708298059a8171 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Tue, 30 Aug 2011 10:58:22 +0200 Subject: [PATCH 23/77] KVM: Fix instruction size issue in pvclock scaling Commit de2d1a524e94 ("KVM: Fix register corruption in pvclock_scale_delta") introduced a mul instruction that may have only a memory operand; the assembler therefore cannot select the correct size: pvclock.s:229: Error: no instruction mnemonic suffix given and no register operands; can't size instruction In this example the assembler is: #APP mul -48(%rbp) ; shrd $32, %rdx, %rax #NO_APP A simple solution is to use mulq. Signed-off-by: Duncan Sands Signed-off-by: Avi Kivity --- arch/x86/include/asm/pvclock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index a518c0a45044..c59cc97fe6c1 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( - "mul %[mul_frac] ; shrd $32, %[hi], %[lo]" + "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" : [lo]"=a"(product), [hi]"=d"(tmp) : "0"(delta), From 7f310a5d4e8525ac0cc2f58c973d2100ce034410 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 23 Jun 2011 16:34:38 -0400 Subject: [PATCH 24/77] perf_event: Fix broken calc_timer_values() We detected a serious issue with PERF_SAMPLE_READ and timing information when events were being multiplexing. Samples would have time_running > time_enabled. That was easy to reproduce with a libpfm4 example (ran 3 times to cause multiplexing on Core 2): $ syst_smpl -e uops_retired:freq=1 & $ syst_smpl -e uops_retired:freq=1 & $ syst_smpl -e uops_retired:freq=1 & IIP:0x0000000040062d ... PERIOD:2355332948 ENA=40144625315 RUN=60014875184 syst_smpl: WARNING: time_running > time_enabled 63277537998 uops_retired:freq=1 , scaled The bug was not present in kernel up to (and including) 3.0. It turns out the bug was introduced by the following commit: commit c4794295917ebeda8013b6cb9c8d71ab4f74a1fa events: Move lockless timer calculation into helper function The parameters of the function got reversed yet the call sites were not updated to reflect the change. That lead to time_running and time_enabled being swapped. That had no effect when there was no multiplexing because in that case time_running = time_enabled but it would show up in any other scenario. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110829124112.GA4828@quad Signed-off-by: Ingo Molnar --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 45847fbb599a..0f857782d06f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3396,8 +3396,8 @@ static int perf_event_index(struct perf_event *event) } static void calc_timer_values(struct perf_event *event, - u64 *running, - u64 *enabled) + u64 *enabled, + u64 *running) { u64 now, ctx_time; From 20afc60f892d285fde179ead4b24e6a7938c2f1b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 30 Aug 2011 12:32:36 +0400 Subject: [PATCH 25/77] x86, perf: Check that current->mm is alive before getting user callchain An event may occur when an mm is already released. I added an event in dequeue_entity() and caught a panic with the following backtrace: [ 434.421110] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 434.421258] IP: [] __get_user_pages_fast+0x9c/0x120 ... [ 434.421258] Call Trace: [ 434.421258] [] copy_from_user_nmi+0x51/0xf0 [ 434.421258] [] ? sched_clock_local+0x25/0x90 [ 434.421258] [] perf_callchain_user+0x128/0x170 [ 434.421258] [] ? __perf_event_header__init_id+0xed/0x100 [ 434.421258] [] perf_prepare_sample+0x200/0x280 [ 434.421258] [] __perf_event_overflow+0x1b8/0x290 [ 434.421258] [] ? tg_shares_up+0x0/0x670 [ 434.421258] [] ? walk_tg_tree+0x6a/0xb0 [ 434.421258] [] perf_swevent_overflow+0xc4/0xf0 [ 434.421258] [] do_perf_sw_event+0x1e0/0x250 [ 434.421258] [] perf_tp_event+0x44/0x70 [ 434.421258] [] ftrace_profile_sched_block+0xdf/0x110 [ 434.421258] [] dequeue_entity+0x2ad/0x2d0 [ 434.421258] [] dequeue_task_fair+0x1c/0x60 [ 434.421258] [] dequeue_task+0x9a/0xb0 [ 434.421258] [] deactivate_task+0x42/0xe0 [ 434.421258] [] thread_return+0x191/0x808 [ 434.421258] [] ? switch_task_namespaces+0x24/0x60 [ 434.421258] [] do_exit+0x464/0x910 [ 434.421258] [] do_group_exit+0x58/0xd0 [ 434.421258] [] sys_exit_group+0x17/0x20 [ 434.421258] [] system_call_fastpath+0x16/0x1b Signed-off-by: Andrey Vagin Signed-off-by: Peter Zijlstra Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1314693156-24131-1-git-send-email-avagin@openvz.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4ee3abf20ed6..cfa62ec090ec 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) perf_callchain_store(entry, regs->ip); + if (!current->mm) + return; + if (perf_callchain_user32(regs, entry)) return; From 08c14071fda4e69abb9d5b1566651cd092b158d3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 18 Aug 2011 15:23:47 +0300 Subject: [PATCH 26/77] mmc: rename mmc_host_clk_{ungate|gate} to mmc_host_clk_{hold|release} As per suggestion by Linus Walleij: > If you think the names of the functions are confusing then > you may rename them, say like this: > > mmc_host_clk_ungate() -> mmc_host_clk_hold() > mmc_host_clk_gate() -> mmc_host_clk_release() > > Which would make the usecases more clear (This is CC'd to stable@ because the next two patches, which fix observable races, depend on it.) Signed-off-by: Mika Westerberg Reviewed-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 4 ++-- drivers/mmc/core/host.c | 10 +++++----- drivers/mmc/core/host.h | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 91a0a7460ebb..63ffc65f84af 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -133,7 +133,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) if (mrq->done) mrq->done(mrq); - mmc_host_clk_gate(host); + mmc_host_clk_release(host); } } @@ -192,7 +192,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) mrq->stop->mrq = mrq; } } - mmc_host_clk_ungate(host); + mmc_host_clk_hold(host); led_trigger_event(host->led, LED_FULL); host->ops->request(host, mrq); } diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index b29d3e8fd3a2..96a26b2bf5f0 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -119,14 +119,14 @@ static void mmc_host_clk_gate_work(struct work_struct *work) } /** - * mmc_host_clk_ungate - ungate hardware MCI clocks + * mmc_host_clk_hold - ungate hardware MCI clocks * @host: host to ungate. * * Makes sure the host ios.clock is restored to a non-zero value * past this call. Increase clock reference count and ungate clock * if we're the first user. */ -void mmc_host_clk_ungate(struct mmc_host *host) +void mmc_host_clk_hold(struct mmc_host *host) { unsigned long flags; @@ -164,14 +164,14 @@ static bool mmc_host_may_gate_card(struct mmc_card *card) } /** - * mmc_host_clk_gate - gate off hardware MCI clocks + * mmc_host_clk_release - gate off hardware MCI clocks * @host: host to gate. * * Calls the host driver with ios.clock set to zero as often as possible * in order to gate off hardware MCI clocks. Decrease clock reference * count and schedule disabling of clock. */ -void mmc_host_clk_gate(struct mmc_host *host) +void mmc_host_clk_release(struct mmc_host *host) { unsigned long flags; @@ -231,7 +231,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host) if (cancel_work_sync(&host->clk_gate_work)) mmc_host_clk_gate_delayed(host); if (host->clk_gated) - mmc_host_clk_ungate(host); + mmc_host_clk_hold(host); /* There should be only one user now */ WARN_ON(host->clk_requests > 1); } diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index de199f911928..fb8a5cd2e4a1 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -16,16 +16,16 @@ int mmc_register_host_class(void); void mmc_unregister_host_class(void); #ifdef CONFIG_MMC_CLKGATE -void mmc_host_clk_ungate(struct mmc_host *host); -void mmc_host_clk_gate(struct mmc_host *host); +void mmc_host_clk_hold(struct mmc_host *host); +void mmc_host_clk_release(struct mmc_host *host); unsigned int mmc_host_clk_rate(struct mmc_host *host); #else -static inline void mmc_host_clk_ungate(struct mmc_host *host) +static inline void mmc_host_clk_hold(struct mmc_host *host) { } -static inline void mmc_host_clk_gate(struct mmc_host *host) +static inline void mmc_host_clk_release(struct mmc_host *host) { } From 778e277cb82411c9002ca28ccbd216c4d9eb9158 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 18 Aug 2011 15:23:48 +0300 Subject: [PATCH 27/77] mmc: core: prevent aggressive clock gating racing with ios updates We have seen at least two different races when clock gating kicks in in a middle of ios structure update. First one happens when ios->clock is changed outside of aggressive clock gating framework, for example via mmc_set_clock(). The race might happen when we run following code: mmc_set_ios(): ... if (ios->clock > 0) mmc_set_ungated(host); Now if gating kicks in right after the condition check we end up setting host->clk_gated to false even though we have just gated the clock. Next time a request is started we try to ungate and restore the clock in mmc_host_clk_hold(). However since we have host->clk_gated set to false the original clock is not restored. This eventually will cause the host controller to hang since its clock is disabled while we are trying to issue a request. For example on Intel Medfield platform we see: [ 13.818610] mmc2: Timeout waiting for hardware interrupt. [ 13.818698] sdhci: =========== REGISTER DUMP (mmc2)=========== [ 13.818753] sdhci: Sys addr: 0x00000000 | Version: 0x00008901 [ 13.818804] sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 [ 13.818853] sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 [ 13.818903] sdhci: Present: 0x1fff0000 | Host ctl: 0x00000001 [ 13.818951] sdhci: Power: 0x0000000d | Blk gap: 0x00000000 [ 13.819000] sdhci: Wake-up: 0x00000000 | Clock: 0x00000000 [ 13.819049] sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 [ 13.819098] sdhci: Int enab: 0x00ff00c3 | Sig enab: 0x00ff00c3 [ 13.819147] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.819196] sdhci: Caps: 0x6bee32b2 | Caps_1: 0x00000000 [ 13.819245] sdhci: Cmd: 0x00000000 | Max curr: 0x00000000 [ 13.819292] sdhci: Host ctl2: 0x00000000 [ 13.819331] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000 [ 13.819377] sdhci: =========================================== [ 13.919605] mmc2: Reset 0x2 never completed. and it never recovers. Second race might happen while running mmc_power_off(): static void mmc_power_off(struct mmc_host *host) { host->ios.clock = 0; host->ios.vdd = 0; [ clock gating kicks in here ] /* * Reset ocr mask to be the highest possible voltage supported for * this mmc host. This value will be used at next power up. */ host->ocr = 1 << (fls(host->ocr_avail) - 1); if (!mmc_host_is_spi(host)) { host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; host->ios.chip_select = MMC_CS_DONTCARE; } host->ios.power_mode = MMC_POWER_OFF; host->ios.bus_width = MMC_BUS_WIDTH_1; host->ios.timing = MMC_TIMING_LEGACY; mmc_set_ios(host); } If the clock gating worker kicks in while we are only partially updated the ios structure the host controller gets incomplete ios and might not work as supposed. Again on Intel Medfield platform we get: [ 4.185349] kernel BUG at drivers/mmc/host/sdhci.c:1155! [ 4.185422] invalid opcode: 0000 [#1] PREEMPT SMP [ 4.185509] Modules linked in: [ 4.185565] [ 4.185608] Pid: 4, comm: kworker/0:0 Not tainted 3.0.0+ #240 Intel Corporation Medfield/iCDKA [ 4.185742] EIP: 0060:[] EFLAGS: 00010083 CPU: 0 [ 4.185827] EIP is at sdhci_set_power+0x3e/0xd0 [ 4.185891] EAX: f5ff98e0 EBX: f5ff98e0 ECX: 00000000 EDX: 00000001 [ 4.185970] ESI: f5ff977c EDI: f5ff9904 EBP: f644fe98 ESP: f644fe94 [ 4.186049] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 4.186125] Process kworker/0:0 (pid: 4, ti=f644e000 task=f644c0e0 task.ti=f644e000) [ 4.186219] Stack: [ 4.186257] f5ff98e0 f644feb0 c1365173 00000282 f5ff9460 f5ff96e0 f5ff96e0 f644feec [ 4.186418] c1355bd8 f644c0e0 c1499c3d f5ff96e0 f644fed4 00000006 f5ff96e0 00000286 [ 4.186579] f644fedc c107922b f644feec 00000286 f5ff9460 f5ff9700 f644ff10 c135839e [ 4.186739] Call Trace: [ 4.186802] [] sdhci_set_ios+0x1c3/0x340 [ 4.186883] [] mmc_gate_clock+0x68/0x120 [ 4.186963] [] ? _raw_spin_unlock_irqrestore+0x4d/0x60 [ 4.187052] [] ? trace_hardirqs_on+0xb/0x10 [ 4.187134] [] mmc_host_clk_gate_delayed+0xbe/0x130 [ 4.187219] [] ? process_one_work+0xf9/0x5b0 [ 4.187300] [] mmc_host_clk_gate_work+0xd/0x10 [ 4.187379] [] process_one_work+0x172/0x5b0 [ 4.187457] [] ? process_one_work+0xf9/0x5b0 [ 4.187538] [] ? mmc_host_clk_gate_delayed+0x130/0x130 [ 4.187625] [] worker_thread+0x118/0x330 [ 4.187700] [] ? preempt_schedule+0x2e/0x50 [ 4.187779] [] ? rescuer_thread+0x1f0/0x1f0 [ 4.187857] [] kthread+0x74/0x80 [ 4.187931] [] ? __init_kthread_worker+0x60/0x60 [ 4.188015] [] kernel_thread_helper+0x6/0xd [ 4.188079] Code: 81 fa 00 00 04 00 0f 84 a7 00 00 00 7f 21 81 fa 80 00 00 00 0f 84 92 00 00 00 81 fa 00 00 0 [ 4.188780] EIP: [] sdhci_set_power+0x3e/0xd0 SS:ESP 0068:f644fe94 [ 4.188898] ---[ end trace a7b23eecc71777e4 ]--- This BUG() comes from the fact that ios.power_mode was still in previous value (MMC_POWER_ON) and ios.vdd was set to zero. We prevent these by inhibiting the clock gating while we update the ios structure. Both problems can be reproduced by simply running the device in a reboot loop. Signed-off-by: Mika Westerberg Reviewed-by: Linus Walleij Tested-by: Chris Ball Cc: Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 63ffc65f84af..b27b94078c21 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -728,15 +728,17 @@ static inline void mmc_set_ios(struct mmc_host *host) */ void mmc_set_chip_select(struct mmc_host *host, int mode) { + mmc_host_clk_hold(host); host->ios.chip_select = mode; mmc_set_ios(host); + mmc_host_clk_release(host); } /* * Sets the host clock to the highest possible frequency that * is below "hz". */ -void mmc_set_clock(struct mmc_host *host, unsigned int hz) +static void __mmc_set_clock(struct mmc_host *host, unsigned int hz) { WARN_ON(hz < host->f_min); @@ -747,6 +749,13 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz) mmc_set_ios(host); } +void mmc_set_clock(struct mmc_host *host, unsigned int hz) +{ + mmc_host_clk_hold(host); + __mmc_set_clock(host, hz); + mmc_host_clk_release(host); +} + #ifdef CONFIG_MMC_CLKGATE /* * This gates the clock by setting it to 0 Hz. @@ -779,7 +788,7 @@ void mmc_ungate_clock(struct mmc_host *host) if (host->clk_old) { BUG_ON(host->ios.clock); /* This call will also set host->clk_gated to false */ - mmc_set_clock(host, host->clk_old); + __mmc_set_clock(host, host->clk_old); } } @@ -807,8 +816,10 @@ void mmc_set_ungated(struct mmc_host *host) */ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode) { + mmc_host_clk_hold(host); host->ios.bus_mode = mode; mmc_set_ios(host); + mmc_host_clk_release(host); } /* @@ -816,8 +827,10 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode) */ void mmc_set_bus_width(struct mmc_host *host, unsigned int width) { + mmc_host_clk_hold(host); host->ios.bus_width = width; mmc_set_ios(host); + mmc_host_clk_release(host); } /** @@ -1015,8 +1028,10 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) ocr &= 3 << bit; + mmc_host_clk_hold(host); host->ios.vdd = bit; mmc_set_ios(host); + mmc_host_clk_release(host); } else { pr_warning("%s: host doesn't support card's voltages\n", mmc_hostname(host)); @@ -1063,8 +1078,10 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11 */ void mmc_set_timing(struct mmc_host *host, unsigned int timing) { + mmc_host_clk_hold(host); host->ios.timing = timing; mmc_set_ios(host); + mmc_host_clk_release(host); } /* @@ -1072,8 +1089,10 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing) */ void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type) { + mmc_host_clk_hold(host); host->ios.drv_type = drv_type; mmc_set_ios(host); + mmc_host_clk_release(host); } /* @@ -1091,6 +1110,8 @@ static void mmc_power_up(struct mmc_host *host) { int bit; + mmc_host_clk_hold(host); + /* If ocr is set, we use it */ if (host->ocr) bit = ffs(host->ocr) - 1; @@ -1126,10 +1147,14 @@ static void mmc_power_up(struct mmc_host *host) * time required to reach a stable voltage. */ mmc_delay(10); + + mmc_host_clk_release(host); } static void mmc_power_off(struct mmc_host *host) { + mmc_host_clk_hold(host); + host->ios.clock = 0; host->ios.vdd = 0; @@ -1147,6 +1172,8 @@ static void mmc_power_off(struct mmc_host *host) host->ios.bus_width = MMC_BUS_WIDTH_1; host->ios.timing = MMC_TIMING_LEGACY; mmc_set_ios(host); + + mmc_host_clk_release(host); } /* From 50a50f9248497484c678631a9c1a719f1aaeab79 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 18 Aug 2011 15:23:49 +0300 Subject: [PATCH 28/77] mmc: core: use non-reentrant workqueue for clock gating The default multithread workqueue can cause the same work to be executed concurrently on a different CPUs. This isn't really suitable for clock gating as it might already gated the clock and gating it twice results both host->clk_old and host->ios.clock to be set to 0. To prevent this from happening we use system_nrt_wq instead. Signed-off-by: Mika Westerberg Reviewed-by: Linus Walleij Tested-by: Chris Ball Cc: Signed-off-by: Chris Ball --- drivers/mmc/core/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 96a26b2bf5f0..793d0a0dad8d 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -179,7 +179,7 @@ void mmc_host_clk_release(struct mmc_host *host) host->clk_requests--; if (mmc_host_may_gate_card(host->card) && !host->clk_requests) - schedule_work(&host->clk_gate_work); + queue_work(system_nrt_wq, &host->clk_gate_work); spin_unlock_irqrestore(&host->clk_lock, flags); } From b91df1593e361109f1fe665ce17c5e87ca60582b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 19 Aug 2011 10:07:07 +0900 Subject: [PATCH 29/77] mmc: sdhi: initialise mmc_data->flags before use This corrects a logic error that I introduced in "mmc: sdhi: Add write16_hook" Reported-by: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Chris Ball --- drivers/mmc/host/sh_mobile_sdhi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 774f6439d7ce..0c4a672f5db6 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -120,11 +120,11 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->hclk = clk_get_rate(priv->clk); mmc_data->set_pwr = sh_mobile_sdhi_set_pwr; mmc_data->get_cd = sh_mobile_sdhi_get_cd; - if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT) - mmc_data->write16_hook = sh_mobile_sdhi_write16_hook; mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED; if (p) { mmc_data->flags = p->tmio_flags; + if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT) + mmc_data->write16_hook = sh_mobile_sdhi_write16_hook; mmc_data->ocr_mask = p->tmio_ocr_mask; mmc_data->capabilities |= p->tmio_caps; From 93c712f99d8e412b2d297edfe9f59b90636897c1 Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Tue, 9 Aug 2011 12:19:31 +0530 Subject: [PATCH 30/77] mmc: sd: UHS-I bus speed should be set last in UHS initialization mmc_sd_init_uhs_card function sets the driver type, current limit and bus speed mode on card as well as on host controller side. Currently bus speed mode is set by sending CMD6 to card and immediately setting the timing mode in host controller. But then before initiating tuning sequence, it also tries to set current limit by sending CMD6 to card which results in data timeout errors in controller if bus speed mode is SDR50/SDR104 mode. So basically bus speed mode should be set only after current limit is set in the card and immediately after setting the bus speed mode, tuning sequence should be initiated. Signed-off-by: Subhash Jadavani Reviewed-by: Arindam Nath Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 81 ++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 633975ff2bb3..0370e03e3142 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -469,56 +469,75 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status) return 0; } -static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status) +static void sd_update_bus_speed_mode(struct mmc_card *card) { - unsigned int bus_speed = 0, timing = 0; - int err; - /* * If the host doesn't support any of the UHS-I modes, fallback on * default speed. */ if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | - MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) - return 0; + MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) { + card->sd_bus_speed = 0; + return; + } if ((card->host->caps & MMC_CAP_UHS_SDR104) && (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) { - bus_speed = UHS_SDR104_BUS_SPEED; - timing = MMC_TIMING_UHS_SDR104; - card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR; + card->sd_bus_speed = UHS_SDR104_BUS_SPEED; } else if ((card->host->caps & MMC_CAP_UHS_DDR50) && (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) { - bus_speed = UHS_DDR50_BUS_SPEED; - timing = MMC_TIMING_UHS_DDR50; - card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR; + card->sd_bus_speed = UHS_DDR50_BUS_SPEED; } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50)) { - bus_speed = UHS_SDR50_BUS_SPEED; - timing = MMC_TIMING_UHS_SDR50; - card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR; + card->sd_bus_speed = UHS_SDR50_BUS_SPEED; } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) && (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) { - bus_speed = UHS_SDR25_BUS_SPEED; - timing = MMC_TIMING_UHS_SDR25; - card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR; + card->sd_bus_speed = UHS_SDR25_BUS_SPEED; } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR12)) { - bus_speed = UHS_SDR12_BUS_SPEED; - timing = MMC_TIMING_UHS_SDR12; - card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR; + card->sd_bus_speed = UHS_SDR12_BUS_SPEED; + } +} + +static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status) +{ + int err; + unsigned int timing = 0; + + switch (card->sd_bus_speed) { + case UHS_SDR104_BUS_SPEED: + timing = MMC_TIMING_UHS_SDR104; + card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR; + break; + case UHS_DDR50_BUS_SPEED: + timing = MMC_TIMING_UHS_DDR50; + card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR; + break; + case UHS_SDR50_BUS_SPEED: + timing = MMC_TIMING_UHS_SDR50; + card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR; + break; + case UHS_SDR25_BUS_SPEED: + timing = MMC_TIMING_UHS_SDR25; + card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR; + break; + case UHS_SDR12_BUS_SPEED: + timing = MMC_TIMING_UHS_SDR12; + card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR; + break; + default: + return 0; } - card->sd_bus_speed = bus_speed; - err = mmc_sd_switch(card, 1, 0, bus_speed, status); + err = mmc_sd_switch(card, 1, 0, card->sd_bus_speed, status); if (err) return err; - if ((status[16] & 0xF) != bus_speed) + if ((status[16] & 0xF) != card->sd_bus_speed) printk(KERN_WARNING "%s: Problem setting bus speed mode!\n", mmc_hostname(card->host)); else { @@ -618,18 +637,24 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4); } + /* + * Select the bus speed mode depending on host + * and card capability. + */ + sd_update_bus_speed_mode(card); + /* Set the driver strength for the card */ err = sd_select_driver_type(card, status); if (err) goto out; - /* Set bus speed mode of the card */ - err = sd_set_bus_speed_mode(card, status); + /* Set current limit for the card */ + err = sd_set_current_limit(card, status); if (err) goto out; - /* Set current limit for the card */ - err = sd_set_current_limit(card, status); + /* Set bus speed mode of the card */ + err = sd_set_bus_speed_mode(card, status); if (err) goto out; From 49bb1e619568ec84785ceb366f07db2a6f0b64cc Mon Sep 17 00:00:00 2001 From: Girish K S Date: Fri, 26 Aug 2011 14:58:18 +0530 Subject: [PATCH 31/77] mmc: sdhci-s3c: Fix mmc card I/O problem This patch fixes the problem in sdhci-s3c host driver for Samsung Soc's. During the card identification stage the mmc core driver enumerates for the best bus width in combination with the highest available data rate. It starts enumerating from the highest bus width (8) to lowest width (1). In case of few MMC cards the 4-bit bus enumeration fails and tries the 1-bit bus enumeration. When switched to 1-bit bus mode the host driver has to clear the previous bus width setting and apply the new setting. The current patch will clear the previous bus mode and apply the new mode setting. Signed-off-by: Girish K S Acked-by: Jaehoon Chung Cc: Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-s3c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 2bd7bf4fece7..fe886d6c474a 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -302,6 +302,8 @@ static int sdhci_s3c_platform_8bit_width(struct sdhci_host *host, int width) ctrl &= ~SDHCI_CTRL_8BITBUS; break; default: + ctrl &= ~SDHCI_CTRL_4BITBUS; + ctrl &= ~SDHCI_CTRL_8BITBUS; break; } From aca420bc51f48b0701963ba3a6234442a0cabebd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 31 Aug 2011 14:45:53 -0700 Subject: [PATCH 32/77] libceph: fix leak of osd structs during shutdown We want to remove all OSDs, not just those on the idle LRU. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ce310eee708d..16836a7df7a6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -685,6 +685,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) put_osd(osd); } +static void remove_all_osds(struct ceph_osd_client *osdc) +{ + dout("__remove_old_osds %p\n", osdc); + mutex_lock(&osdc->request_mutex); + while (!RB_EMPTY_ROOT(&osdc->osds)) { + struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), + struct ceph_osd, o_node); + __remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); +} + static void __move_osd_to_lru(struct ceph_osd_client *osdc, struct ceph_osd *osd) { @@ -701,14 +713,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd) list_del_init(&osd->o_osd_lru); } -static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) +static void remove_old_osds(struct ceph_osd_client *osdc) { struct ceph_osd *osd, *nosd; dout("__remove_old_osds %p\n", osdc); mutex_lock(&osdc->request_mutex); list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { - if (!remove_all && time_before(jiffies, osd->lru_ttl)) + if (time_before(jiffies, osd->lru_ttl)) break; __remove_osd(osdc, osd); } @@ -751,6 +763,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) struct rb_node *parent = NULL; struct ceph_osd *osd = NULL; + dout("__insert_osd %p osd%d\n", new, new->o_osd); while (*p) { parent = *p; osd = rb_entry(parent, struct ceph_osd, o_node); @@ -1144,7 +1157,7 @@ static void handle_osds_timeout(struct work_struct *work) dout("osds timeout\n"); down_read(&osdc->map_sem); - remove_old_osds(osdc, 0); + remove_old_osds(osdc); up_read(&osdc->map_sem); schedule_delayed_work(&osdc->osds_timeout_work, @@ -1862,8 +1875,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = NULL; } - remove_old_osds(osdc, 1); - WARN_ON(!RB_EMPTY_ROOT(&osdc->osds)); + remove_all_osds(osdc); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); From d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 19 Aug 2011 15:57:16 +0100 Subject: [PATCH 33/77] xen: use maximum reservation to limit amount of usable RAM Use the domain's maximum reservation to limit the amount of extra RAM for the memory balloon. This reduces the size of the pages tables and the amount of reserved low memory (which defaults to about 1/32 of the total RAM). On a system with 8 GiB of RAM with the domain limited to 1 GiB the kernel reports: Before: Memory: 627792k/4472000k available After: Memory: 549740k/11132224k available A increase of about 76 MiB (~1.5% of the unused 7 GiB). The reserved low memory is also reduced from 253 MiB to 32 MiB. The total additional usable RAM is 329 MiB. For dom0, this requires at patch to Xen ('x86: use 'dom0_mem' to limit the number of pages for dom0') (c/s 23790) CC: stable@kernel.org Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 02ffd9e48c9f..ff3dfa176814 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -183,6 +183,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list, PFN_UP(start_pci), PFN_DOWN(last)); return identity; } + +static unsigned long __init xen_get_max_pages(void) +{ + unsigned long max_pages = MAX_DOMAIN_PAGES; + domid_t domid = DOMID_SELF; + int ret; + + ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); + if (ret > 0) + max_pages = ret; + return min(max_pages, MAX_DOMAIN_PAGES); +} + /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ @@ -291,6 +304,12 @@ char * __init xen_memory_setup(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + extra_limit = xen_get_max_pages(); + if (extra_limit >= max_pfn) + extra_pages = extra_limit - max_pfn; + else + extra_pages = 0; + extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); /* From d198d499148a0c64a41b3aba9e7dd43772832b91 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 1 Sep 2011 13:46:55 +0200 Subject: [PATCH 34/77] xen: x86_32: do not enable iterrupts when returning from exception in interrupt context If vmalloc page_fault happens inside of interrupt handler with interrupts disabled then on exit path from exception handler when there is no pending interrupts, the following code (arch/x86/xen/xen-asm_32.S:112): cmpw $0x0001, XEN_vcpu_info_pending(%eax) sete XEN_vcpu_info_mask(%eax) will enable interrupts even if they has been previously disabled according to eflags from the bounce frame (arch/x86/xen/xen-asm_32.S:99) testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) setz XEN_vcpu_info_mask(%eax) Solution is in setting XEN_vcpu_info_mask only when it should be set according to cmpw $0x0001, XEN_vcpu_info_pending(%eax) but not clearing it if there isn't any pending events. Reproducer for bug is attached to RHBZ 707552 CC: stable@kernel.org Signed-off-by: Igor Mammedov Acked-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-asm_32.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 22a2093b5862..b040b0e518ca 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -113,11 +113,13 @@ xen_iret_start_crit: /* * If there's something pending, mask events again so we can - * jump back into xen_hypervisor_callback + * jump back into xen_hypervisor_callback. Otherwise do not + * touch XEN_vcpu_info_mask. */ - sete XEN_vcpu_info_mask(%eax) + jne 1f + movb $1, XEN_vcpu_info_mask(%eax) - popl %eax +1: popl %eax /* * From this point on the registers are restored and the stack From ed467e69f16e6b480e2face7bc5963834d025f91 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 1 Sep 2011 09:48:27 -0400 Subject: [PATCH 35/77] xen/smp: Warn user why they keel over - nosmp or noapic and what to use instead. We have hit a couple of customer bugs where they would like to use those parameters to run an UP kernel - but both of those options turn of important sources of interrupt information so we end up not being able to boot. The correct way is to pass in 'dom0_max_vcpus=1' on the Xen hypervisor line and the kernel will patch itself to be a UP kernel. Fixes bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=637308 CC: stable@kernel.org Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index e79dbb95482b..d4fc6d454f8d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -32,6 +32,7 @@ #include #include +#include #include "xen-ops.h" #include "mmu.h" @@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) unsigned cpu; unsigned int i; + if (skip_ioapic_setup) { + char *m = (max_cpus == 0) ? + "The nosmp parameter is incompatible with Xen; " \ + "use Xen dom0_max_vcpus=1 parameter" : + "The noapic parameter is incompatible with Xen"; + + xen_raw_printk(m); + panic(m); + } xen_init_lock_cpu(0); smp_store_cpu_info(0); From f1ca1512e765337a7c09eb875eedef8ea4e07654 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Sep 2011 14:10:32 +0200 Subject: [PATCH 36/77] iommu/amd: Make sure iommu->need_sync contains correct value The value is only set to true but never set back to false, which causes to many completion-wait commands to be sent to hardware. Fix it with this patch. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a14f8dc23462..45652231dae8 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -605,7 +605,9 @@ static void build_inv_all(struct iommu_cmd *cmd) * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. */ -static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +static int iommu_queue_command_sync(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + bool sync) { u32 left, tail, head, next_tail; unsigned long flags; @@ -639,13 +641,18 @@ again: copy_cmd_to_buffer(iommu, cmd, tail); /* We need to sync now to make sure all commands are processed */ - iommu->need_sync = true; + iommu->need_sync = sync; spin_unlock_irqrestore(&iommu->lock, flags); return 0; } +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +{ + return iommu_queue_command_sync(iommu, cmd, true); +} + /* * This function queues a completion wait command into the command * buffer of an IOMMU @@ -661,7 +668,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) build_completion_wait(&cmd, (u64)&sem); - ret = iommu_queue_command(iommu, &cmd); + ret = iommu_queue_command_sync(iommu, &cmd, false); if (ret) return ret; From e33acde91140f1809952d1c135c36feb66a51887 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Sep 2011 14:19:50 +0200 Subject: [PATCH 37/77] iommu/amd: Don't take domain->lock recursivly The domain_flush_devices() function takes the domain->lock. But this function is only called from update_domain() which itself is already called unter the domain->lock. This causes a deadlock situation when the dma-address-space of a domain grows larger than 1GB. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 45652231dae8..0e4227f457af 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -847,14 +847,9 @@ static void domain_flush_complete(struct protection_domain *domain) static void domain_flush_devices(struct protection_domain *domain) { struct iommu_dev_data *dev_data; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(dev_data, &domain->dev_list, list) device_flush_dte(dev_data); - - spin_unlock_irqrestore(&domain->lock, flags); } /**************************************************************************** From fb492c9160f3d40d09456a79cc669fba74d7d9cc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 Aug 2011 17:45:10 +0100 Subject: [PATCH 38/77] ARM: 7067/1: mm: keep significant bits in pfn_valid When ARCH_HAS_HOLES_MEMORYMODEL is selected, pfn_valid calls memblock_is_memory to test validity of a pfn: > memblock_is_memory(pfn << PAGE_SHIFT); On LPAE systems this cuts off the top bits, as the shift occurs before the value is promoted to a phys_addr_t. This patch replaces the shift with a call to __pfn_to_phys (which casts pfn to phys_addr_t before shifting), preventing the loss of significant bits. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 91bca355cd31..cc7e2d8be9aa 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -298,7 +298,7 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return memblock_is_memory(__pfn_to_phys(pfn)); } EXPORT_SYMBOL(pfn_valid); #endif From 1df726ef0a700587a712a3660b2caa8e533c7de9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 5 Sep 2011 08:58:29 +0100 Subject: [PATCH 39/77] NET: am79c961: fix race in link status code The link status code operates from a timer, and writes the index register without first taking a lock. A well-placed interrupt between writing the index register and reading the data register could change the index register on us, which will return wrong data. Add the necessary lock. Signed-off-by: Russell King --- drivers/net/arm/am79c961a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c index 52fe21e1e2cd..3b1416e3d217 100644 --- a/drivers/net/arm/am79c961a.c +++ b/drivers/net/arm/am79c961a.c @@ -308,8 +308,11 @@ static void am79c961_timer(unsigned long data) struct net_device *dev = (struct net_device *)data; struct dev_priv *priv = netdev_priv(dev); unsigned int lnkstat, carrier; + unsigned long flags; + spin_lock_irqsave(&priv->chip_lock, flags); lnkstat = read_ireg(dev->base_addr, ISALED0) & ISALED0_LNKST; + spin_unlock_irqrestore(&priv->chip_lock, flags); carrier = netif_carrier_ok(dev); if (lnkstat && !carrier) { From da063d260969c4e5e5f91d911ba87f7f6b48ead0 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Mon, 29 Aug 2011 13:33:32 +0200 Subject: [PATCH 40/77] dmaengine/ste_dma40: add missing kernel doc for pending_queue Signed-off-by: Per Forlin Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index cd3a7c726bf8..486b6c0b44e3 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -174,6 +174,7 @@ struct d40_base; * @tasklet: Tasklet that gets scheduled from interrupt context to complete a * transfer and call client callback. * @client: Cliented owned descriptor list. + * @pending_queue: Submitted jobs, to be issued by issue_pending() * @active: Active descriptor. * @queue: Queued jobs. * @dma_cfg: The client configuration of this dma channel. From 3b3d5b0f855b3eec45a02832e97c3c1890ff8823 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Mon, 29 Aug 2011 13:33:33 +0200 Subject: [PATCH 41/77] dmaengine/ste_dma40: remove duplicate call to d40_pool_lli_free(). d40_desc_free() already calls d40_pool_lli_free(). Signed-off-by: Per Forlin Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 486b6c0b44e3..37388d10497a 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -478,7 +478,6 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c) list_for_each_entry_safe(d, _d, &d40c->client, node) if (async_tx_test_ack(&d->txd)) { - d40_pool_lli_free(d40c, d); d40_desc_remove(d); desc = d; memset(desc, 0, sizeof(*desc)); @@ -1209,7 +1208,6 @@ static void dma_tasklet(unsigned long data) if (!d40d->cyclic) { if (async_tx_test_ack(&d40d->txd)) { - d40_pool_lli_free(d40c, d40d); d40_desc_remove(d40d); d40_desc_free(d40c, d40d); } else { @@ -1606,7 +1604,6 @@ static int d40_free_dma(struct d40_chan *d40c) /* Release client owned descriptors */ if (!list_empty(&d40c->client)) list_for_each_entry_safe(d, _d, &d40c->client, node) { - d40_pool_lli_free(d40c, d); d40_desc_remove(d); d40_desc_free(d40c, d); } From 7404368c22b4910ab839238e48d96be45180f6fc Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Mon, 29 Aug 2011 13:33:34 +0200 Subject: [PATCH 42/77] dmaengine/ste_dma40: fix Oops due to double free of client descriptor The client list may exist in two lists at the same time. This makes free fail since the same desc is freed multiple times. Remove desc from client list when adding it to the pending queue. Move free of client owned descriptors from free_dma() to terminate_all(). Unable to handle kernel paging request at virtual address 00100104 pgd = dea8c000 [00100104] *pgd=1ea62831, *pte=00000000, *ppte=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP Modules linked in: CPU: 0 Not tainted (3.1.0-rc3+ #58) PC is at d40_free_chan_resources+0x64/0x330 Signed-off-by: Per Forlin Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 37388d10497a..92ec0a26401a 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -644,8 +644,11 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) return d; } +/* remove desc from current queue and add it to the pending_queue */ static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) { + d40_desc_remove(desc); + desc->is_in_client_list = false; list_add_tail(&desc->node, &d40c->pending_queue); } @@ -803,6 +806,7 @@ done: static void d40_term_all(struct d40_chan *d40c) { struct d40_desc *d40d; + struct d40_desc *_d; /* Release active descriptors */ while ((d40d = d40_first_active_get(d40c))) { @@ -822,6 +826,14 @@ static void d40_term_all(struct d40_chan *d40c) d40_desc_free(d40c, d40d); } + /* Release client owned descriptors */ + if (!list_empty(&d40c->client)) + list_for_each_entry_safe(d40d, _d, &d40c->client, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + d40c->pending_tx = 0; d40c->busy = false; } @@ -1594,20 +1606,10 @@ static int d40_free_dma(struct d40_chan *d40c) u32 event; struct d40_phy_res *phy = d40c->phy_chan; bool is_src; - struct d40_desc *d; - struct d40_desc *_d; - /* Terminate all queued and active transfers */ d40_term_all(d40c); - /* Release client owned descriptors */ - if (!list_empty(&d40c->client)) - list_for_each_entry_safe(d, _d, &d40c->client, node) { - d40_desc_remove(d); - d40_desc_free(d40c, d); - } - if (phy == NULL) { chan_err(d40c, "phy == null\n"); return -EINVAL; From 82babbb361f207a80cffa8ac34c2b6a0b62acc88 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Mon, 29 Aug 2011 13:33:35 +0200 Subject: [PATCH 43/77] dmaengine/ste_dma40: fix memory leak due to prepared descriptors Prepared descriptors that are not submitted will not be freed. Add prepared descriptor to a list to be able to release them upon dmaengine_terminate_all(). Signed-off-by: Per Forlin Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 92ec0a26401a..467e4dcb20a0 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -177,6 +177,7 @@ struct d40_base; * @pending_queue: Submitted jobs, to be issued by issue_pending() * @active: Active descriptor. * @queue: Queued jobs. + * @prepare_queue: Prepared jobs. * @dma_cfg: The client configuration of this dma channel. * @configured: whether the dma_cfg configuration is valid * @base: Pointer to the device instance struct. @@ -204,6 +205,7 @@ struct d40_chan { struct list_head pending_queue; struct list_head active; struct list_head queue; + struct list_head prepare_queue; struct stedma40_chan_cfg dma_cfg; bool configured; struct d40_base *base; @@ -833,6 +835,13 @@ static void d40_term_all(struct d40_chan *d40c) d40_desc_free(d40c, d40d); } + /* Release descriptors in prepare queue */ + if (!list_empty(&d40c->prepare_queue)) + list_for_each_entry_safe(d40d, _d, + &d40c->prepare_queue, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } d40c->pending_tx = 0; d40c->busy = false; @@ -1911,6 +1920,12 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src, goto err; } + /* + * add descriptor to the prepare queue in order to be able + * to free them later in terminate_all + */ + list_add_tail(&desc->node, &chan->prepare_queue); + spin_unlock_irqrestore(&chan->lock, flags); return &desc->txd; @@ -2400,6 +2415,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, INIT_LIST_HEAD(&d40c->queue); INIT_LIST_HEAD(&d40c->pending_queue); INIT_LIST_HEAD(&d40c->client); + INIT_LIST_HEAD(&d40c->prepare_queue); tasklet_init(&d40c->tasklet, dma_tasklet, (unsigned long) d40c); From 5204f5e3f5b3c706e52682590de5974a82ea54f9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 5 Sep 2011 08:07:47 -0700 Subject: [PATCH 44/77] regmap: Remove bitrotted module_put()s The conversion to per bus type registration functions means we don't need to do module_get()s to hold the bus types in memory (their users will link to them) so we removed all those calls. This left module_put() calls in the cleanup paths which aren't needed and which cause unbalanced puts if we ever try to unload anything. Reported-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 0eef4da1ac61..20663f8dae45 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -168,13 +168,11 @@ struct regmap *regmap_init(struct device *dev, map->work_buf = kmalloc(map->format.buf_size, GFP_KERNEL); if (map->work_buf == NULL) { ret = -ENOMEM; - goto err_bus; + goto err_map; } return map; -err_bus: - module_put(map->bus->owner); err_map: kfree(map); err: @@ -188,7 +186,6 @@ EXPORT_SYMBOL_GPL(regmap_init); void regmap_exit(struct regmap *map) { kfree(map->work_buf); - module_put(map->bus->owner); kfree(map); } EXPORT_SYMBOL_GPL(regmap_exit); From 5441ae5eb3614d3c28f77073370738a2820c88e4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Jul 2011 18:06:32 +0000 Subject: [PATCH 45/77] fs/9p: Add fid before dentry instantiation d_instantiate marks the dentry positive. So a parallel lookup and mkdir of the directory can find dentry that doesn't have fid attached. This can result in both the code path doing v9fs_fid_add which results in v9fs_dentry leak. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 4 +--- fs/9p/vfs_inode_dotl.c | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8bb5507e822f..43dd540663af 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -645,13 +645,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; - + d_instantiate(dentry, inode); return ofid; - error: if (ofid) p9_client_clunk(ofid); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index b6c8ed205192..0ca224c8bb60 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -281,10 +281,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); /* Now set the ACL based on the default value */ v9fs_set_create_acl(dentry, &dacl, &pacl); @@ -403,10 +403,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* @@ -657,10 +657,10 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* Not in cached mode. No need to populate inode with stat */ @@ -810,10 +810,10 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* From 45089142b1497dab2327d60f6c71c40766fc3ea4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Jul 2011 18:06:33 +0000 Subject: [PATCH 46/77] fs/9p: Don't update file type when updating file attributes We should only update attributes that we can change on stat2inode. Also do file type initialization in v9fs_init_inode. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 4 +- fs/9p/vfs_inode.c | 91 +++++++++++++++++++++++------------------- fs/9p/vfs_inode_dotl.c | 23 +++++++---- fs/9p/vfs_super.c | 2 +- 4 files changed, 68 insertions(+), 52 deletions(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 46ce357ca1ab..7ac1faec2bde 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache; struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_destroy_inode(struct inode *inode); -struct inode *v9fs_get_inode(struct super_block *sb, int mode); +struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); int v9fs_init_inode(struct v9fs_session_info *v9ses, - struct inode *inode, int mode); + struct inode *inode, int mode, dev_t); void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 43dd540663af..3563cace0a2e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) /** * p9mode2unixmode- convert plan9 mode bits to unix mode bits * @v9ses: v9fs session information - * @mode: mode to convert + * @stat: p9_wstat from which mode need to be derived + * @rdev: major number, minor number in case of device files. * */ - -static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) +static int p9mode2unixmode(struct v9fs_session_info *v9ses, + struct p9_wstat *stat, dev_t *rdev) { int res; + int mode = stat->mode; - res = mode & 0777; + res = mode & S_IALLUGO; + *rdev = 0; if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; @@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) && (v9ses->nodev == 0)) res |= S_IFIFO; else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) - && (v9ses->nodev == 0)) - res |= S_IFBLK; - else + && (v9ses->nodev == 0)) { + char type = 0, ext[32]; + int major = -1, minor = -1; + + strncpy(ext, stat->extension, sizeof(ext)); + sscanf(ext, "%c %u %u", &type, &major, &minor); + switch (type) { + case 'c': + res |= S_IFCHR; + break; + case 'b': + res |= S_IFBLK; + break; + default: + P9_DPRINTK(P9_DEBUG_ERROR, + "Unknown special type %c %s\n", type, + stat->extension); + }; + *rdev = MKDEV(major, minor); + } else res |= S_IFREG; if (v9fs_proto_dotu(v9ses)) { @@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) if ((mode & P9_DMSETVTX) == P9_DMSETVTX) res |= S_ISVTX; } - return res; } @@ -242,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode) } int v9fs_init_inode(struct v9fs_session_info *v9ses, - struct inode *inode, int mode) + struct inode *inode, int mode, dev_t rdev) { int err = 0; inode_init_owner(inode, NULL, mode); inode->i_blocks = 0; - inode->i_rdev = 0; + inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->a_ops = &v9fs_addr_operations; @@ -335,7 +354,7 @@ error: * */ -struct inode *v9fs_get_inode(struct super_block *sb, int mode) +struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) { int err; struct inode *inode; @@ -348,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); return ERR_PTR(-ENOMEM); } - err = v9fs_init_inode(v9ses, inode, mode); + err = v9fs_init_inode(v9ses, inode, mode, rdev); if (err) { iput(inode); return ERR_PTR(err); @@ -435,11 +454,12 @@ void v9fs_evict_inode(struct inode *inode) static int v9fs_test_inode(struct inode *inode, void *data) { int umode; + dev_t rdev; struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_wstat *st = (struct p9_wstat *)data; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - umode = p9mode2unixmode(v9ses, st->mode); + umode = p9mode2unixmode(v9ses, st, &rdev); /* don't match inode of different type */ if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) return 0; @@ -473,6 +493,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, struct p9_wstat *st, int new) { + dev_t rdev; int retval, umode; unsigned long i_ino; struct inode *inode; @@ -496,8 +517,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, * later. */ inode->i_ino = i_ino; - umode = p9mode2unixmode(v9ses, st->mode); - retval = v9fs_init_inode(v9ses, inode, umode); + umode = p9mode2unixmode(v9ses, st, &rdev); + retval = v9fs_init_inode(v9ses, inode, umode, rdev); if (retval) goto error; @@ -1000,7 +1021,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, return PTR_ERR(st); v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); - generic_fillattr(dentry->d_inode, stat); + generic_fillattr(dentry->d_inode, stat); p9stat_free(st); kfree(st); @@ -1084,6 +1105,7 @@ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct super_block *sb) { + mode_t mode; char ext[32]; char tag_name[14]; unsigned int i_nlink; @@ -1119,31 +1141,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, inode->i_nlink = i_nlink; } } - inode->i_mode = p9mode2unixmode(v9ses, stat->mode); - if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { - char type = 0; - int major = -1; - int minor = -1; - - strncpy(ext, stat->extension, sizeof(ext)); - sscanf(ext, "%c %u %u", &type, &major, &minor); - switch (type) { - case 'c': - inode->i_mode &= ~S_IFBLK; - inode->i_mode |= S_IFCHR; - break; - case 'b': - break; - default: - P9_DPRINTK(P9_DEBUG_ERROR, - "Unknown special type %c %s\n", type, - stat->extension); - }; - inode->i_rdev = MKDEV(major, minor); - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } else - inode->i_rdev = 0; - + mode = stat->mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ @@ -1409,6 +1409,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { + int umode; + dev_t rdev; loff_t i_size; struct p9_wstat *st; struct v9fs_session_info *v9ses; @@ -1417,6 +1419,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) st = p9_client_stat(fid); if (IS_ERR(st)) return PTR_ERR(st); + /* + * Don't update inode if the file type is different + */ + umode = p9mode2unixmode(v9ses, st, &rdev); + if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) + goto out; spin_lock(&inode->i_lock); /* @@ -1428,6 +1436,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) if (v9ses->cache) inode->i_size = i_size; spin_unlock(&inode->i_lock); +out: p9stat_free(st); kfree(st); return 0; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0ca224c8bb60..a3f2540cc4b2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -153,7 +153,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, * later. */ inode->i_ino = i_ino; - retval = v9fs_init_inode(v9ses, inode, st->st_mode); + retval = v9fs_init_inode(v9ses, inode, + st->st_mode, new_decode_dev(st->st_rdev)); if (retval) goto error; @@ -414,7 +415,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, * inode with stat. We need to get an inode * so that we can set the acl with dentry */ - inode = v9fs_get_inode(dir->i_sb, mode); + inode = v9fs_get_inode(dir->i_sb, mode, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -540,6 +541,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) { + mode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { @@ -552,11 +554,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; inode->i_nlink = stat->st_nlink; - inode->i_mode = stat->st_mode; - inode->i_rdev = new_decode_dev(stat->st_rdev); - if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) - init_special_inode(inode, inode->i_mode, inode->i_rdev); + mode = stat->st_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; @@ -664,7 +665,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, fid = NULL; } else { /* Not in cached mode. No need to populate inode with stat */ - inode = v9fs_get_inode(dir->i_sb, S_IFLNK); + inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -820,7 +821,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, * Not in cached mode. No need to populate inode with stat. * socket syscall returns a fd, so we need instantiate */ - inode = v9fs_get_inode(dir->i_sb, mode); + inode = v9fs_get_inode(dir->i_sb, mode, rdev); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -886,6 +887,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) st = p9_client_getattr_dotl(fid, P9_STATS_ALL); if (IS_ERR(st)) return PTR_ERR(st); + /* + * Don't update inode if the file type is different + */ + if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) + goto out; spin_lock(&inode->i_lock); /* @@ -897,6 +903,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) if (v9ses->cache) inode->i_size = i_size; spin_unlock(&inode->i_lock); +out: kfree(st); return 0; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index feef6cdc1fd2..c70251d47ed1 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, else sb->s_d_op = &v9fs_dentry_operations; - inode = v9fs_get_inode(sb, S_IFDIR | mode); + inode = v9fs_get_inode(sb, S_IFDIR | mode, 0); if (IS_ERR(inode)) { retval = PTR_ERR(inode); goto release_sb; From b49d8b5d7007a673796f3f99688b46931293873e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Aug 2011 16:56:04 +0000 Subject: [PATCH 47/77] net/9p: Fix kernel crash with msize 512K With msize equal to 512K (PAGE_SIZE * VIRTQUEUE_NUM), we hit multiple crashes. This patch fix those. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 175b5135bdcf..e317583fcc73 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int in, out, inp, outp; struct virtio_chan *chan = client->trans; - char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; size_t pdata_off = 0; struct trans_rpage_info *rpinfo = NULL; @@ -346,7 +345,8 @@ req_retry_pinned: * Arrange in such a way that server places header in the * alloced memory and payload onto the user buffer. */ - inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + inp = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, 11); /* * Running executables in the filesystem may result in * a read request with kernel buffer as opposed to user buffer. @@ -366,8 +366,8 @@ req_retry_pinned: } in += inp; } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, - req->rc->capacity); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, + req->rc->sdata, req->rc->capacity); } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); @@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = { .close = p9_virtio_close, .request = p9_virtio_request, .cancel = p9_virtio_cancel, - .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, + + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, From f88657ce3f9713a0c62101dffb0e972a979e77b9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Aug 2011 19:55:32 +0530 Subject: [PATCH 48/77] fs/9p: Add OS dependent open flags in 9p protocol Some of the flags are OS/arch dependent we add a 9p protocol value which maps to asm-generic/fcntl.h values in Linux Based on the original patch from Venkateswararao Jujjuri Signed-off-by: Aneesh Kumar K.V --- fs/9p/v9fs_vfs.h | 2 ++ fs/9p/vfs_file.c | 2 +- fs/9p/vfs_inode.c | 16 +++++++++++- fs/9p/vfs_inode_dotl.c | 55 +++++++++++++++++++++++++++++++++++++++++- include/net/9p/9p.h | 24 ++++++++++++++++++ 5 files changed, 96 insertions(+), 3 deletions(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 7ac1faec2bde..410ffd6ceb5f 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -83,4 +83,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; return; } + +int v9fs_open_to_dotl_flags(int flags); #endif diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3c173fcc2c5a..c2f107583125 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9inode = V9FS_I(inode); v9ses = v9fs_inode2v9ses(inode); if (v9fs_proto_dotl(v9ses)) - omode = file->f_flags; + omode = v9fs_open_to_dotl_flags(file->f_flags); else omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 3563cace0a2e..9e3ea6ce6951 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -552,6 +552,19 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, return inode; } +/** + * v9fs_at_to_dotl_flags- convert Linux specific AT flags to + * plan 9 AT flag. + * @flags: flags to convert + */ +static int v9fs_at_to_dotl_flags(int flags) +{ + int rflags = 0; + if (flags & AT_REMOVEDIR) + rflags |= P9_DOTL_AT_REMOVEDIR; + return rflags; +} + /** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted @@ -579,7 +592,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) return retval; } if (v9fs_proto_dotl(v9ses)) - retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags); + retval = p9_client_unlinkat(dfid, dentry->d_name.name, + v9fs_at_to_dotl_flags(flags)); if (retval == -EOPNOTSUPP) { /* Try the one based on path */ v9fid = v9fs_fid_clone(dentry); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a3f2540cc4b2..aded79fcd5cf 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -191,6 +191,58 @@ v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, return inode; } +struct dotl_openflag_map { + int open_flag; + int dotl_flag; +}; + +static int v9fs_mapped_dotl_flags(int flags) +{ + int i; + int rflags = 0; + struct dotl_openflag_map dotl_oflag_map[] = { + { O_CREAT, P9_DOTL_CREATE }, + { O_EXCL, P9_DOTL_EXCL }, + { O_NOCTTY, P9_DOTL_NOCTTY }, + { O_TRUNC, P9_DOTL_TRUNC }, + { O_APPEND, P9_DOTL_APPEND }, + { O_NONBLOCK, P9_DOTL_NONBLOCK }, + { O_DSYNC, P9_DOTL_DSYNC }, + { FASYNC, P9_DOTL_FASYNC }, + { O_DIRECT, P9_DOTL_DIRECT }, + { O_LARGEFILE, P9_DOTL_LARGEFILE }, + { O_DIRECTORY, P9_DOTL_DIRECTORY }, + { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, + { O_NOATIME, P9_DOTL_NOATIME }, + { O_CLOEXEC, P9_DOTL_CLOEXEC }, + { O_SYNC, P9_DOTL_SYNC}, + }; + for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { + if (flags & dotl_oflag_map[i].open_flag) + rflags |= dotl_oflag_map[i].dotl_flag; + } + return rflags; +} + +/** + * v9fs_open_to_dotl_flags- convert Linux specific open flags to + * plan 9 open flag. + * @flags: flags to convert + */ +int v9fs_open_to_dotl_flags(int flags) +{ + int rflags = 0; + + /* + * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY + * and P9_DOTL_NOACCESS + */ + rflags |= flags & O_ACCMODE; + rflags |= v9fs_mapped_dotl_flags(flags); + + return rflags; +} + /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @dir: directory inode that is being created @@ -259,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, "Failed to get acl values in creat %d\n", err); goto error; } - err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); + err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), + mode, gid, &qid); if (err < 0) { P9_DPRINTK(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 342dcf13d039..957ab99897a1 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -288,6 +288,30 @@ enum p9_perm_t { P9_DMSETVTX = 0x00010000, }; +/* 9p2000.L open flags */ +#define P9_DOTL_RDONLY 00000000 +#define P9_DOTL_WRONLY 00000001 +#define P9_DOTL_RDWR 00000002 +#define P9_DOTL_NOACCESS 00000003 +#define P9_DOTL_CREATE 00000100 +#define P9_DOTL_EXCL 00000200 +#define P9_DOTL_NOCTTY 00000400 +#define P9_DOTL_TRUNC 00001000 +#define P9_DOTL_APPEND 00002000 +#define P9_DOTL_NONBLOCK 00004000 +#define P9_DOTL_DSYNC 00010000 +#define P9_DOTL_FASYNC 00020000 +#define P9_DOTL_DIRECT 00040000 +#define P9_DOTL_LARGEFILE 00100000 +#define P9_DOTL_DIRECTORY 00200000 +#define P9_DOTL_NOFOLLOW 00400000 +#define P9_DOTL_NOATIME 01000000 +#define P9_DOTL_CLOEXEC 02000000 +#define P9_DOTL_SYNC 04000000 + +/* 9p2000.L at flags */ +#define P9_DOTL_AT_REMOVEDIR 0x200 + /** * enum p9_qid_t - QID types * @P9_QTDIR: directory From 73f507171cfa407b19f254aef95cbb058c8180cf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Aug 2011 22:19:28 +0530 Subject: [PATCH 49/77] fs/9p: Always ask new inode in lookup for cache mode disabled This make sure we don't end up reusing the unlinked inode object. The ideal way is to use inode i_generation. But i_generation is not available in userspace always. Signed-off-by: Aneesh Kumar K.V --- fs/9p/vfs_inode.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 9e3ea6ce6951..e3c03db3c788 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -825,6 +825,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nameidata) { + struct dentry *res; struct super_block *sb; struct v9fs_session_info *v9ses; struct p9_fid *dfid, *fid; @@ -856,22 +857,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(result); } - - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + /* + * Make sure we don't use a wrong inode due to parallel + * unlink. For cached mode create calls request for new + * inode. But with cache disabled, lookup should do this. + */ + if (v9ses->cache) + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + else + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { result = PTR_ERR(inode); inode = NULL; goto error; } - result = v9fs_fid_add(dentry, fid); if (result < 0) goto error_iput; - inst_out: - d_add(dentry, inode); - return NULL; - + /* + * If we had a rename on the server and a parallel lookup + * for the new name, then make sure we instantiate with + * the new name. ie look up for a/b, while on server somebody + * moved b under k and client parallely did a lookup for + * k/b. + */ + res = d_materialise_unique(dentry, inode); + if (!IS_ERR(res)) + return res; + result = PTR_ERR(res); error_iput: iput(inode); error: From 51b8b4fb32271d39fbdd760397406177b2b0fd36 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 21 Aug 2011 00:21:18 +0530 Subject: [PATCH 50/77] fs/9p: Use protocol-defined value for lock/getlock 'type' field. Signed-off-by: Jim Garlick Signed-off-by: Aneesh Kumar K.V --- fs/9p/vfs_file.c | 34 +++++++++++++++++++++++++++------- include/net/9p/9p.h | 5 +++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index c2f107583125..62857a810a79 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) /* convert posix lock to p9 tlock args */ memset(&flock, 0, sizeof(flock)); - flock.type = fl->fl_type; + /* map the lock type */ + switch (fl->fl_type) { + case F_RDLCK: + flock.type = P9_LOCK_TYPE_RDLCK; + break; + case F_WRLCK: + flock.type = P9_LOCK_TYPE_WRLCK; + break; + case F_UNLCK: + flock.type = P9_LOCK_TYPE_UNLCK; + break; + } flock.start = fl->fl_start; if (fl->fl_end == OFFSET_MAX) flock.length = 0; @@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) /* convert posix lock to p9 tgetlock args */ memset(&glock, 0, sizeof(glock)); - glock.type = fl->fl_type; + glock.type = P9_LOCK_TYPE_UNLCK; glock.start = fl->fl_start; if (fl->fl_end == OFFSET_MAX) glock.length = 0; @@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) res = p9_client_getlock_dotl(fid, &glock); if (res < 0) return res; - if (glock.type != F_UNLCK) { - fl->fl_type = glock.type; + /* map 9p lock type to os lock type */ + switch (glock.type) { + case P9_LOCK_TYPE_RDLCK: + fl->fl_type = F_RDLCK; + break; + case P9_LOCK_TYPE_WRLCK: + fl->fl_type = F_WRLCK; + break; + case P9_LOCK_TYPE_UNLCK: + fl->fl_type = F_UNLCK; + break; + } + if (glock.type != P9_LOCK_TYPE_UNLCK) { fl->fl_start = glock.start; if (glock.length == 0) fl->fl_end = OFFSET_MAX; else fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = glock.proc_id; - } else - fl->fl_type = F_UNLCK; - + } return res; } diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 957ab99897a1..a6326ef8ade6 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -312,6 +312,11 @@ enum p9_perm_t { /* 9p2000.L at flags */ #define P9_DOTL_AT_REMOVEDIR 0x200 +/* 9p2000.L lock type */ +#define P9_LOCK_TYPE_RDLCK 0 +#define P9_LOCK_TYPE_WRLCK 1 +#define P9_LOCK_TYPE_UNLCK 2 + /** * enum p9_qid_t - QID types * @P9_QTDIR: directory From ff71c182f461da5ae9d2d65f8a63f5a9193b9be1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 28 Aug 2011 13:01:49 -0700 Subject: [PATCH 51/77] hwmon: (max16065) Fix current calculation Current calculation is completely wrong. Add missing brackets to fix it. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare Cc: stable@kernel.org # 3.0+ --- drivers/hwmon/max16065.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c index d94a24fdf4ba..dd2d7b9620c2 100644 --- a/drivers/hwmon/max16065.c +++ b/drivers/hwmon/max16065.c @@ -124,7 +124,7 @@ static inline int MV_TO_LIMIT(int mv, int range) static inline int ADC_TO_CURR(int adc, int gain) { - return adc * 1400000 / gain * 255; + return adc * 1400000 / (gain * 255); } /* From d91aae1e52e5289a94f4ddff968decfc8d37271e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 29 Aug 2011 22:53:20 -0700 Subject: [PATCH 52/77] hwmon: (max16065) Add chip access warning to documentation The chips supported by the max16065 driver should not be accessed using direct i2ctools commands. Add warning to driver documentation to alert users. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- Documentation/hwmon/max16065 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065 index 44b4f61e04f9..c11f64a1f2ad 100644 --- a/Documentation/hwmon/max16065 +++ b/Documentation/hwmon/max16065 @@ -62,6 +62,13 @@ can be safely used to identify the chip. You will have to instantiate the devices explicitly. Please see Documentation/i2c/instantiating-devices for details. +WARNING: Do not access chip registers using the i2cdump command, and do not use +any of the i2ctools commands on a command register (0xa5 to 0xac). The chips +supported by this driver interpret any access to a command register (including +read commands) as request to execute the command in question. This may result in +power loss, board resets, and/or Flash corruption. Worst case, your board may +turn into a brick. + Sysfs entries ------------- From f020b007d5dd24597f5e985a6309bcb8393797ed Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 31 Aug 2011 11:53:41 -0400 Subject: [PATCH 53/77] hwmon: (ucd9000/ucd9200) Optimize array walk Rewrite the loop walking the id array during probe. The new code is better adapted to a null-terminated array, and is also clearer and more efficient than the original. Signed-off-by: Jean Delvare Cc: Axel Lin Cc: Guenter Roeck Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ucd9000.c | 6 ++---- drivers/hwmon/pmbus/ucd9200.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index ace1c7319734..d0ddb60155c9 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -141,13 +141,11 @@ static int ucd9000_probe(struct i2c_client *client, block_buffer[ret] = '\0'; dev_info(&client->dev, "Device ID %s\n", block_buffer); - mid = NULL; - for (i = 0; i < ARRAY_SIZE(ucd9000_id); i++) { - mid = &ucd9000_id[i]; + for (mid = ucd9000_id; mid->name[0]; mid++) { if (!strncasecmp(mid->name, block_buffer, strlen(mid->name))) break; } - if (!mid || !strlen(mid->name)) { + if (!mid->name[0]) { dev_err(&client->dev, "Unsupported device\n"); return -ENODEV; } diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c index ffcc1cf3609d..c65e9da707cc 100644 --- a/drivers/hwmon/pmbus/ucd9200.c +++ b/drivers/hwmon/pmbus/ucd9200.c @@ -68,13 +68,11 @@ static int ucd9200_probe(struct i2c_client *client, block_buffer[ret] = '\0'; dev_info(&client->dev, "Device ID %s\n", block_buffer); - mid = NULL; - for (i = 0; i < ARRAY_SIZE(ucd9200_id); i++) { - mid = &ucd9200_id[i]; + for (mid = ucd9200_id; mid->name[0]; mid++) { if (!strncasecmp(mid->name, block_buffer, strlen(mid->name))) break; } - if (!mid || !strlen(mid->name)) { + if (!mid->name[0]) { dev_err(&client->dev, "Unsupported device\n"); return -ENODEV; } From 7a703aded97e01d7f4a6b8440a431117399666ba Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Aug 2011 14:37:37 +0800 Subject: [PATCH 54/77] i2c-pxa2xx: return proper error code in ce4100_i2c_probe error paths Signed-off-by: Axel Lin Acked-by: Sebastian Andrzej Siewior Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-pxa-pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c index 6659d269b841..b73da6cd6f91 100644 --- a/drivers/i2c/busses/i2c-pxa-pci.c +++ b/drivers/i2c/busses/i2c-pxa-pci.c @@ -109,12 +109,15 @@ static int __devinit ce4100_i2c_probe(struct pci_dev *dev, return -EINVAL; } sds = kzalloc(sizeof(*sds), GFP_KERNEL); - if (!sds) + if (!sds) { + ret = -ENOMEM; goto err_mem; + } for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) { sds->pdev[i] = add_i2c_device(dev, i); if (IS_ERR(sds->pdev[i])) { + ret = PTR_ERR(sds->pdev[i]); while (--i >= 0) platform_device_unregister(sds->pdev[i]); goto err_dev_add; From 406bd18a7a39ef69f1d60a41d9de74932bcb98d4 Mon Sep 17 00:00:00 2001 From: John Bonesio Date: Tue, 30 Aug 2011 11:46:08 -0600 Subject: [PATCH 55/77] i2c-tegra: Add of_match_table This patch was intended to be part of 7ca2d1a105a239e300b937e9c41a10a4bd08f569 "i2c: Tegra: Add DeviceTree support". However, an early version of that patch, which was missing a chunk, was applied to next-i2c. This change is that missing chunk. Signed-off-by: John Bonesio Signed-off-by: Stephen Warren Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-tegra.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 2440b7411978..126b4f060231 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -719,6 +719,17 @@ static int tegra_i2c_resume(struct platform_device *pdev) } #endif +#if defined(CONFIG_OF) +/* Match table for of_platform binding */ +static const struct of_device_id tegra_i2c_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra20-i2c", }, + {}, +}; +MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); +#else +#define tegra_i2c_of_match NULL +#endif + static struct platform_driver tegra_i2c_driver = { .probe = tegra_i2c_probe, .remove = tegra_i2c_remove, @@ -729,6 +740,7 @@ static struct platform_driver tegra_i2c_driver = { .driver = { .name = "tegra-i2c", .owner = THIS_MODULE, + .of_match_table = tegra_i2c_of_match, }, }; From 048e29cff95168ea3a9f176e84cc0bae54d0ae64 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 30 Aug 2011 11:46:09 -0600 Subject: [PATCH 56/77] i2c-tegra: add I2C_FUNC_SMBUS_EMUL Signed-off-by: Mike Rapoport Signed-off-by: Stephen Warren Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 126b4f060231..17ded1d2f11d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -531,7 +531,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], static u32 tegra_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C; + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm tegra_i2c_algo = { From 96219c3a257cc8ba3b3cae67efdc88be37cf7c9d Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Tue, 30 Aug 2011 11:46:10 -0600 Subject: [PATCH 57/77] i2c-tegra: fix possible race condition after tx In tegra_i2c_fill_tx_fifo, once we have finished pushing all the bytes to the I2C hardware controller, the interrupt might happen before we have updated i2c_dev->msg_buf_remaining at the end of the function. Then, in tegra_i2c_isr, we will call again tegra_i2c_fill_tx_fifo triggering weird behaviour. This has been shown to happen under real conditions. Signed-off-by: Doug Anderson Tested-by: Vincent Palatin Acked-by: Rhyland Klein Acked-by: Stephen Warren Signed-off-by: Stephen Warren Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-tegra.c | 44 ++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 17ded1d2f11d..3c94c4a81a55 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -270,14 +270,30 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) /* Rounds down to not include partial word at the end of buf */ words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD; - if (words_to_transfer > tx_fifo_avail) - words_to_transfer = tx_fifo_avail; - i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + /* It's very common to have < 4 bytes, so optimize that case. */ + if (words_to_transfer) { + if (words_to_transfer > tx_fifo_avail) + words_to_transfer = tx_fifo_avail; - buf += words_to_transfer * BYTES_PER_FIFO_WORD; - buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; - tx_fifo_avail -= words_to_transfer; + /* + * Update state before writing to FIFO. If this casues us + * to finish writing all bytes (AKA buf_remaining goes to 0) we + * have a potential for an interrupt (PACKET_XFER_COMPLETE is + * not maskable). We need to make sure that the isr sees + * buf_remaining as 0 and doesn't call us back re-entrantly. + */ + buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; + tx_fifo_avail -= words_to_transfer; + i2c_dev->msg_buf_remaining = buf_remaining; + i2c_dev->msg_buf = buf + + words_to_transfer * BYTES_PER_FIFO_WORD; + barrier(); + + i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + + buf += words_to_transfer * BYTES_PER_FIFO_WORD; + } /* * If there is a partial word at the end of buf, handle it manually to @@ -287,14 +303,15 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) if (tx_fifo_avail > 0 && buf_remaining > 0) { BUG_ON(buf_remaining > 3); memcpy(&val, buf, buf_remaining); + + /* Again update before writing to FIFO to make sure isr sees. */ + i2c_dev->msg_buf_remaining = 0; + i2c_dev->msg_buf = NULL; + barrier(); + i2c_writel(i2c_dev, val, I2C_TX_FIFO); - buf_remaining = 0; - tx_fifo_avail--; } - BUG_ON(tx_fifo_avail > 0 && buf_remaining > 0); - i2c_dev->msg_buf_remaining = buf_remaining; - i2c_dev->msg_buf = buf; return 0; } @@ -411,9 +428,10 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) tegra_i2c_mask_irq(i2c_dev, I2C_INT_TX_FIFO_DATA_REQ); } - if ((status & I2C_INT_PACKET_XFER_COMPLETE) && - !i2c_dev->msg_buf_remaining) + if (status & I2C_INT_PACKET_XFER_COMPLETE) { + BUG_ON(i2c_dev->msg_buf_remaining); complete(&i2c_dev->msg_complete); + } i2c_writel(i2c_dev, status, I2C_INT_STATUS); if (i2c_dev->is_dvc) From bb9ea77846620ed2b37e74c852d72c7a476b248c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 6 Sep 2011 08:08:13 +0100 Subject: [PATCH 58/77] ARM: 7081/1: mach-integrator: fix the clocksource I was intrigued by the fact that the clock stood still on the Integrator, but it wasn't strange at all, because the timer was set up all wrong and probably has been for a while. With this patch the clock starts ticking again: make the timer periodic (reload), |= on the divisor bit and load the timer before starting it. Cc: stable@kernel.org Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-integrator/integrator_ap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 2fbbdd5eac35..fcf0ae95651f 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -337,15 +337,15 @@ static unsigned long timer_reload; static void integrator_clocksource_init(u32 khz) { void __iomem *base = (void __iomem *)TIMER2_VA_BASE; - u32 ctrl = TIMER_CTRL_ENABLE; + u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC; if (khz >= 1500) { khz /= 16; - ctrl = TIMER_CTRL_DIV16; + ctrl |= TIMER_CTRL_DIV16; } - writel(ctrl, base + TIMER_CTRL); writel(0xffff, base + TIMER_LOAD); + writel(ctrl, base + TIMER_CTRL); clocksource_mmio_init(base + TIMER_VALUE, "timer2", khz * 1000, 200, 16, clocksource_mmio_readl_down); From bac7e6ecf60933b68af910eb4c83a775a8b20b19 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 6 Sep 2011 07:45:46 +0100 Subject: [PATCH 59/77] ARM: 7080/1: l2x0: make sure I&D are not locked down on init Fighting unfixed U-Boots and other beasts that may the cache in a locked-down state when starting the kernel, we make sure to disable all cache lock-down when initializing the l2x0 so we are in a known state. Cc: Srinidhi Kasagar Cc: Rabin Vincent Cc: Adrian Bunk Cc: Rob Herring Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Santosh Shilimkar Reported-by: Jan Rinze Tested-by: Robert Marklund Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 9 +++++++-- arch/arm/mm/cache-l2x0.c | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index bfa706ffd968..99a6ed7e1bfd 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -45,8 +45,13 @@ #define L2X0_CLEAN_INV_LINE_PA 0x7F0 #define L2X0_CLEAN_INV_LINE_IDX 0x7F8 #define L2X0_CLEAN_INV_WAY 0x7FC -#define L2X0_LOCKDOWN_WAY_D 0x900 -#define L2X0_LOCKDOWN_WAY_I 0x904 +/* + * The lockdown registers repeat 8 times for L310, the L210 has only one + * D and one I lockdown register at 0x0900 and 0x0904. + */ +#define L2X0_LOCKDOWN_WAY_D_BASE 0x900 +#define L2X0_LOCKDOWN_WAY_I_BASE 0x904 +#define L2X0_LOCKDOWN_STRIDE 0x08 #define L2X0_TEST_OPERATION 0xF00 #define L2X0_LINE_DATA 0xF10 #define L2X0_LINE_TAG 0xF30 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 44c086710d2b..9ecfdb511951 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -277,6 +277,25 @@ static void l2x0_disable(void) spin_unlock_irqrestore(&l2x0_lock, flags); } +static void __init l2x0_unlock(__u32 cache_id) +{ + int lockregs; + int i; + + if (cache_id == L2X0_CACHE_ID_PART_L310) + lockregs = 8; + else + /* L210 and unknown types */ + lockregs = 1; + + for (i = 0; i < lockregs; i++) { + writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } +} + void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) { __u32 aux; @@ -328,6 +347,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) * accessing the below registers will fault. */ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* Make sure that I&D is not locked down when starting */ + l2x0_unlock(cache_id); /* l2x0 controller is disabled */ writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); From dde58cfcc3b6dd2f160ffd355f76ae526155a4df Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 5 Sep 2011 18:45:28 +0200 Subject: [PATCH 60/77] HID: wacom: Fix error path of power-supply initialization power_supply_unregister() must not be called if power_supply_register() failed. The wdata->psy.dev pointer may point to invalid memory after a failed power_supply_register() and hence wacom_remove() will fail while calling power_supply_unregister(). This changes the wacom_probe function to fail if it cannot register the power_supply devices. If we would want to keep the previous behaviour we had to keep some flag about the power_supply state and check it on wacom_remove, but this seems inappropriate here. Hence, we simply fail, too, if power_supply_register fails. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wacom.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c index 06888323828c..f66a597cff63 100644 --- a/drivers/hid/hid-wacom.c +++ b/drivers/hid/hid-wacom.c @@ -353,11 +353,7 @@ static int wacom_probe(struct hid_device *hdev, if (ret) { hid_warn(hdev, "can't create sysfs battery attribute, err: %d\n", ret); - /* - * battery attribute is not critical for the tablet, but if it - * failed then there is no need to create ac attribute - */ - goto move_on; + goto err_battery; } wdata->ac.properties = wacom_ac_props; @@ -371,14 +367,8 @@ static int wacom_probe(struct hid_device *hdev, if (ret) { hid_warn(hdev, "can't create ac battery attribute, err: %d\n", ret); - /* - * ac attribute is not critical for the tablet, but if it - * failed then we don't want to battery attribute to exist - */ - power_supply_unregister(&wdata->battery); + goto err_ac; } - -move_on: #endif hidinput = list_entry(hdev->inputs.next, struct hid_input, list); input = hidinput->input; @@ -416,6 +406,13 @@ move_on: return 0; +#ifdef CONFIG_HID_WACOM_POWER_SUPPLY +err_ac: + power_supply_unregister(&wdata->battery); +err_battery: + device_remove_file(&hdev->dev, &dev_attr_speed); + hid_hw_stop(hdev); +#endif err_free: kfree(wdata); return ret; From 9086617ea3a7f3e574ca64392b827bdd56f607eb Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 5 Sep 2011 18:45:29 +0200 Subject: [PATCH 61/77] HID: wacom: Unregister sysfs attributes on remove HID devices can be hotplugged so we should unregister all sysfs attributes when removing a driver. Otherwise, manually unloading the wacom-driver will not remove the sysfs attributes. Only when the device is disconnected, they are removed, eventually. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wacom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c index f66a597cff63..a597039d0755 100644 --- a/drivers/hid/hid-wacom.c +++ b/drivers/hid/hid-wacom.c @@ -423,6 +423,7 @@ static void wacom_remove(struct hid_device *hdev) #ifdef CONFIG_HID_WACOM_POWER_SUPPLY struct wacom_data *wdata = hid_get_drvdata(hdev); #endif + device_remove_file(&hdev->dev, &dev_attr_speed); hid_hw_stop(hdev); #ifdef CONFIG_HID_WACOM_POWER_SUPPLY From d2f152878d457a84f3708acee5f682322386a79b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Sep 2011 10:16:50 -0700 Subject: [PATCH 62/77] wireless: fix kernel-doc warning in net/cfg80211.h Fix kernel-doc warning in net/cfg80211.h: Warning(include/net/cfg80211.h:1884): No description found for parameter 'registered' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 408ae4882d22..401d73bd151f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1744,6 +1744,8 @@ struct wiphy_wowlan_support { * by default for perm_addr. In this case, the mask should be set to * all-zeroes. In this case it is assumed that the device can handle * the same number of arbitrary MAC addresses. + * @registered: protects ->resume and ->suspend sysfs callbacks against + * unregister hardware * @debugfsdir: debugfs directory used for this wiphy, will be renamed * automatically on wiphy renames * @dev: (virtual) struct device for this wiphy From bff747c58cf97bf4fc8b499ee0f419b59d6b226d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Sep 2011 10:16:47 -0700 Subject: [PATCH 63/77] regulator: fix kernel-doc warning in consumer.h Fix kernel-doc warning about internal/private data by marking it as "private:" so that kernel-doc will ignore it. Warning(include/linux/regulator/consumer.h:128): No description found for parameter 'ret' Signed-off-by: Randy Dunlap Acked-by: Mark Brown Signed-off-by: Linus Torvalds --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 26f6ea4444e3..b47771aa5718 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -123,7 +123,7 @@ struct regulator_bulk_data { const char *supply; struct regulator *consumer; - /* Internal use */ + /* private: Internal use */ int ret; }; From 0ec26fd0698285b31248e34bf1abb022c00f23d6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 5 Sep 2011 18:06:26 +0200 Subject: [PATCH 64/77] vfs: automount should ignore LOOKUP_FOLLOW Prior to 2.6.38 automount would not trigger on either stat(2) or lstat(2) on the automount point. After 2.6.38, with the introduction of the ->d_automount() infrastructure, stat(2) and others would start triggering automount while lstat(2), etc. still would not. This is a regression and a userspace ABI change. Problem originally reported here: http://thread.gmane.org/gmane.linux.kernel.autofs/6098 It appears that there was an attempt at fixing various userspace tools to not trigger the automount. But since the stat system call is rather common it is impossible to "fix" all userspace. This patch reverts the original behavior, which is to not trigger on stat(2) and other symlink following syscalls. [ It's not really clear what the right behavior is. Apparently Solaris does the "automount on stat, leave alone on lstat". And some programs can get unhappy when "stat+open+fstat" ends up giving a different result from the fstat than from the initial stat. But the change in 2.6.38 resulted in problems for some people, so we're going back to old behavior. Maybe we can re-visit this discussion at some future date - Linus ] Reported-by: Leonardo Chiquitto Signed-off-by: Miklos Szeredi Acked-by: Ian Kent Cc: David Howells Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/namei.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 2826db35dc25..b52bc685465f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -727,25 +727,22 @@ static int follow_automount(struct path *path, unsigned flags, if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) return -EISDIR; /* we actually want to stop here */ - /* - * We don't want to mount if someone's just doing a stat and they've - * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and - * appended a '/' to the name. + /* We don't want to mount if someone's just doing a stat - + * unless they're stat'ing a directory and appended a '/' to + * the name. + * + * We do, however, want to mount if someone wants to open or + * create a file of any type under the mountpoint, wants to + * traverse through the mountpoint or wants to open the + * mounted directory. Also, autofs may mark negative dentries + * as being automount points. These will need the attentions + * of the daemon to instantiate them before they can be used. */ - if (!(flags & LOOKUP_FOLLOW)) { - /* We do, however, want to mount if someone wants to open or - * create a file of any type under the mountpoint, wants to - * traverse through the mountpoint or wants to open the mounted - * directory. - * Also, autofs may mark negative dentries as being automount - * points. These will need the attentions of the daemon to - * instantiate them before they can be used. - */ - if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE)) && - path->dentry->d_inode) - return -EISDIR; - } + if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE)) && + path->dentry->d_inode) + return -EISDIR; + current->total_link_count++; if (current->total_link_count >= 40) return -ELOOP; From 5307f6d5fb12fd01f9f321bc4a8fd77e74858647 Mon Sep 17 00:00:00 2001 From: Shyam Iyer Date: Thu, 8 Sep 2011 16:41:17 -0500 Subject: [PATCH 65/77] Fix pointer dereference before call to pcie_bus_configure_settings Commit b03e7495a862 ("PCI: Set PCI-E Max Payload Size on fabric") introduced a potential NULL pointer dereference in calls to pcie_bus_configure_settings due to attempts to access pci_bus self variables when the self pointer is NULL. To correct this, verify that the self pointer in pci_bus is non-NULL before dereferencing it. Reported-by: Stanislaw Gruszka Signed-off-by: Shyam Iyer Signed-off-by: Jon Mason Acked-by: Jesse Barnes Signed-off-by: Linus Torvalds --- arch/x86/pci/acpi.c | 9 +++++++-- drivers/pci/hotplug/pcihp_slot.c | 4 +++- drivers/pci/probe.c | 3 --- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index c95330267f08..039d91315bc5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -365,8 +365,13 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) */ if (bus) { struct pci_bus *child; - list_for_each_entry(child, &bus->children, node) - pcie_bus_configure_settings(child, child->self->pcie_mpss); + list_for_each_entry(child, &bus->children, node) { + struct pci_dev *self = child->self; + if (!self) + continue; + + pcie_bus_configure_settings(child, self->pcie_mpss); + } } if (!bus) diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 753b21aaea61..3ffd9c1acc0a 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -169,7 +169,9 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); + if (dev->bus && dev->bus->self) + pcie_bus_configure_settings(dev->bus, + dev->bus->self->pcie_mpss); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8473727b29fa..0820fc1544e8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1456,9 +1456,6 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) { u8 smpss = mpss; - if (!bus->self) - return; - if (!pci_is_pcie(bus->self)) return; From ed2888e906b56769b4ffabb9c577190438aa68b8 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Thu, 8 Sep 2011 16:41:18 -0500 Subject: [PATCH 66/77] PCI: Remove MRRS modification from MPS setting code Modifying the Maximum Read Request Size to 0 (value of 128Bytes) has massive negative ramifications on some devices. Without knowing which devices have this issue, do not modify from the default value when walking the PCI-E bus in pcie_bus_safe mode. Also, make pcie_bus_safe the default procedure. Tested-by: Sven Schnelle Tested-by: Simon Kirby Tested-by: Stephen M. Cameron Reported-and-tested-by: Eric Dumazet Reported-and-tested-by: Niels Ole Salscheider References: https://bugzilla.kernel.org/show_bug.cgi?id=42162 Signed-off-by: Jon Mason Acked-by: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 2 +- drivers/pci/probe.c | 41 ++++++++++++++++++++++------------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0ce67423a0a3..4e84fd4a4312 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; -enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE; /* * The default CLS is used if arch didn't set CLS explicitly and not diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0820fc1544e8..b1187ff31d89 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1396,34 +1396,37 @@ static void pcie_write_mps(struct pci_dev *dev, int mps) static void pcie_write_mrrs(struct pci_dev *dev, int mps) { - int rc, mrrs; + int rc, mrrs, dev_mpss; - if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { - int dev_mpss = 128 << dev->pcie_mpss; + /* In the "safe" case, do not configure the MRRS. There appear to be + * issues with setting MRRS to 0 on a number of devices. + */ - /* For Max performance, the MRRS must be set to the largest - * supported value. However, it cannot be configured larger - * than the MPS the device or the bus can support. This assumes - * that the largest MRRS available on the device cannot be - * smaller than the device MPSS. - */ - mrrs = mps < dev_mpss ? mps : dev_mpss; - } else - /* In the "safe" case, configure the MRRS for fairness on the - * bus by making all devices have the same size - */ - mrrs = mps; + if (pcie_bus_config != PCIE_BUS_PERFORMANCE) + return; + dev_mpss = 128 << dev->pcie_mpss; + + /* For Max performance, the MRRS must be set to the largest supported + * value. However, it cannot be configured larger than the MPS the + * device or the bus can support. This assumes that the largest MRRS + * available on the device cannot be smaller than the device MPSS. + */ + mrrs = min(mps, dev_mpss); /* MRRS is a R/W register. Invalid values can be written, but a - * subsiquent read will verify if the value is acceptable or not. + * subsequent read will verify if the value is acceptable or not. * If the MRRS value provided is not acceptable (e.g., too large), * shrink the value until it is acceptable to the HW. */ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { + dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value" + " to %d. If any issues are encountered, please try " + "running with pci=pcie_bus_safe\n", mrrs); rc = pcie_set_readrq(dev, mrrs); if (rc) - dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); + dev_err(&dev->dev, + "Failed attempting to set the MRRS\n"); mrrs /= 2; } @@ -1436,13 +1439,13 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) if (!pci_is_pcie(dev)) return 0; - dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); pcie_write_mps(dev, mps); pcie_write_mrrs(dev, mps); - dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); return 0; From 94007751bb02797ba87bac7aacee2731ac2039a3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 10 Sep 2011 17:20:21 +1000 Subject: [PATCH 67/77] Avoid dereferencing a 'request_queue' after last close. On the last close of an 'md' device which as been stopped, the device is destroyed and in particular the request_queue is freed. The free is done in a separate thread so it might happen a short time later. __blkdev_put calls bdev_inode_switch_bdi *after* ->release has been called. Since commit f758eeabeb96f878c860e8f110f94ec8820822a9 bdev_inode_switch_bdi will dereference the 'old' bdi, which lives inside a request_queue, to get a spin lock. This causes the last close on an md device to sometime take a spin_lock which lives in freed memory - which results in an oops. So move the called to bdev_inode_switch_bdi before the call to ->release. Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Andrew Morton Cc: Wu Fengguang Acked-by: Wu Fengguang Cc: stable@kernel.org Signed-off-by: NeilBrown --- fs/block_dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index ff77262e887c..95f786ec7f08 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1429,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); + /* ->release can cause the old bdi to disappear, + * so must switch it out first + */ + bdev_inode_switch_bdi(bdev->bd_inode, + &default_backing_dev_info); } if (bdev->bd_contains == bdev) { if (disk->fops->release) @@ -1442,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; - bdev_inode_switch_bdi(bdev->bd_inode, - &default_backing_dev_info); if (bdev != bdev->bd_contains) victim = bdev->bd_contains; bdev->bd_contains = NULL; From 19d5f834d6aff7efb1c9353523865c5bce869470 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 10 Sep 2011 17:21:17 +1000 Subject: [PATCH 68/77] md/raid10: unify handling of write completion. A write can complete at two different places: 1/ when the last member-device write completes, through raid10_end_write_request 2/ in make_request() when we remove the initial bias from ->remaining. These two should do exactly the same thing and the comment says they do, but they don't. So factor the correct code out into a function and call it in both places. This makes the code much more similar to RAID1. The difference is only significant if there is an error, and they usually take a while, so it is unlikely that there will be an error already when make_request is completing, so this is unlikely to cause real problems. Signed-off-by: NeilBrown --- drivers/md/raid10.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8b29cd4f01c8..f6873fc8e5ee 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio) md_write_end(r10_bio->mddev); } +static void one_write_done(r10bio_t *r10_bio) +{ + if (atomic_dec_and_test(&r10_bio->remaining)) { + if (test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else { + close_write(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + reschedule_retry(r10_bio); + else + raid_end_bio_io(r10_bio); + } + } +} + static void raid10_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error) * Let's see if all mirrored write operations have finished * already. */ - if (atomic_dec_and_test(&r10_bio->remaining)) { - if (test_bit(R10BIO_WriteError, &r10_bio->state)) - reschedule_retry(r10_bio); - else { - close_write(r10_bio); - if (test_bit(R10BIO_MadeGood, &r10_bio->state)) - reschedule_retry(r10_bio); - else - raid_end_bio_io(r10_bio); - } - } + one_write_done(r10_bio); if (dec_rdev) rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } @@ -1127,15 +1132,8 @@ retry_write: spin_unlock_irqrestore(&conf->device_lock, flags); } - if (atomic_dec_and_test(&r10_bio->remaining)) { - /* This matches the end of raid10_end_write_request() */ - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); - md_write_end(mddev); - raid_end_bio_io(r10_bio); - } + /* Remove the bias on 'remaining' */ + one_write_done(r10_bio); /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); From 079fa166a2874985ae58b2e21e26e1cbc91127d4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 10 Sep 2011 17:21:23 +1000 Subject: [PATCH 69/77] md/raid1,10: Remove use-after-free bug in make_request. A single request to RAID1 or RAID10 might result in multiple requests if there are known bad blocks that need to be avoided. To detect if we need to submit another write request we test: if (sectors_handled < (bio->bi_size >> 9)) { However this is after we call **_write_done() so the 'bio' no longer belongs to us - the writes could have completed and the bio freed. So move the **_write_done call until after the test against bio->bi_size. This addresses https://bugzilla.kernel.org/show_bug.cgi?id=41862 Reported-by: Bruno Wolff III Tested-by: Bruno Wolff III Signed-off-by: NeilBrown --- drivers/md/raid1.c | 14 +++++++++----- drivers/md/raid10.c | 13 ++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 32323f0afd89..f4622dd8fc59 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1099,12 +1099,11 @@ read_again: bio_list_add(&conf->pending_bio_list, mbio); spin_unlock_irqrestore(&conf->device_lock, flags); } - r1_bio_write_done(r1_bio); - - /* In case raid1d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); - + /* Mustn't call r1_bio_write_done before this next test, + * as it could result in the bio being freed. + */ if (sectors_handled < (bio->bi_size >> 9)) { + r1_bio_write_done(r1_bio); /* We need another r1_bio. It has already been counted * in bio->bi_phys_segments */ @@ -1117,6 +1116,11 @@ read_again: goto retry_write; } + r1_bio_write_done(r1_bio); + + /* In case raid1d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f6873fc8e5ee..d7a8468ddeab 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1132,13 +1132,12 @@ retry_write: spin_unlock_irqrestore(&conf->device_lock, flags); } - /* Remove the bias on 'remaining' */ - one_write_done(r10_bio); - - /* In case raid10d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); + /* Don't remove the bias on 'remaining' (one_write_done) until + * after checking if we need to go around again. + */ if (sectors_handled < (bio->bi_size >> 9)) { + one_write_done(r10_bio); /* We need another r10_bio. It has already been counted * in bio->bi_phys_segments. */ @@ -1152,6 +1151,10 @@ retry_write: r10_bio->state = 0; goto retry_write; } + one_write_done(r10_bio); + + /* In case raid10d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); From 27a7b260f71439c40546b43588448faac01adb93 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 10 Sep 2011 17:21:28 +1000 Subject: [PATCH 70/77] md: Fix handling for devices from 2TB to 4TB in 0.90 metadata. 0.90 metadata uses an unsigned 32bit number to count the number of kilobytes used from each device. This should allow up to 4TB per device. However we multiply this by 2 (to get sectors) before casting to a larger type, so sizes above 2TB get truncated. Also we allow rdev->sectors to be larger than 4TB, so it is possible for the array to be resized larger than the metadata can handle. So make sure rdev->sectors never exceeds 4TB when 0.90 metadata is in used. Also the sanity check at the end of super_90_load should include level 1 as it used ->size too. (RAID0 and Linear don't use ->size at all). Reported-by: Pim Zandbergen Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3742ce8b0acf..5404b2295820 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1138,8 +1138,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = 0; } rdev->sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that */ + if (rdev->sectors >= (2ULL << 32)) + rdev->sectors = (2ULL << 32) - 2; - if (rdev->sectors < sb->size * 2 && sb->level > 1) + if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) /* "this cannot possibly happen" ... */ ret = -EINVAL; @@ -1173,7 +1176,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; - mddev->dev_sectors = sb->size * 2; + mddev->dev_sectors = ((sector_t)sb->size) * 2; mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; @@ -1415,6 +1418,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) rdev->sb_start = calc_dev_sboffset(rdev); if (!num_sectors || num_sectors > rdev->sb_start) num_sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that. + * 4TB == 2^32 KB, or 2*2^32 sectors. + */ + if (num_sectors >= (2ULL << 32)) + num_sectors = (2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); From c338bfb5ecf6c36b2112479691d69db4c2b5a78a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 10 Sep 2011 20:13:01 +0200 Subject: [PATCH 71/77] backlight: Declare backlight_types[] const Since backlight_types[] isn't modified, let's declare it const. That was probably the intention of the author of commit bb7ca747f8d6 ("backlight: add backlight type"), via which the "const char const *" construct was introduced. The duplicate const was detected by sparse. Signed-off-by: Bart Van Assche Cc: Matthew Garrett Cc: Richard Purdie Cc: Florian Tobias Schandinat Cc: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/backlight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 80d292fb92d8..7363c1b169e8 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -19,7 +19,7 @@ #include #endif -static const char const *backlight_types[] = { +static const char *const backlight_types[] = { [BACKLIGHT_RAW] = "raw", [BACKLIGHT_PLATFORM] = "platform", [BACKLIGHT_FIRMWARE] = "firmware", From 6c6d8deb5d95a0675a8edd588bbc2249cbce5b34 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 8 Sep 2011 18:45:40 +0100 Subject: [PATCH 72/77] ARM: 7088/1: entry: fix wrong parameter name used in do_thumb_abort Commit be020f8618ca, "ARM: entry: abort-macro: specify registers to be used for macros", while replacing register numbers with macro parameter names, mismatched the name used for r1. For me, this resulted in user space built for EABI with -march=armv4t -mtune=arm920t -mthumb-interwork -mthumb broken on my OMAP1510 based Amstrad Delta (old ABI and no thumb still worked for me though). Fix this by using correct parameter name fsr instead of mismatched psr, used by callers for another purpose. Signed-off-by: Janusz Krzysztofik Signed-off-by: Russell King --- arch/arm/mm/abort-macro.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index 52162d59407a..2cbf68ef0e83 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -17,7 +17,7 @@ cmp \tmp, # 0x5600 @ Is it ldrsb? orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes tst \tmp, #1 << 11 @ L = 0 -> write - orreq \psr, \psr, #1 << 11 @ yes. + orreq \fsr, \fsr, #1 << 11 @ yes. b do_DataAbort not_thumb: .endm From 8c369264b6de3b2ab796f330a4d85770a6b8b033 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 3 Aug 2011 18:12:05 +0100 Subject: [PATCH 73/77] ARM: 7009/1: l2x0: Add OF based initialization This adds probing for ARM L2x0 cache controllers via device tree. Support includes the L210, L220, and PL310 controllers. The binding allows setting up cache RAM latencies and filter addresses (PL310 only). Signed-off-by: Rob Herring Acked-by: Grant Likely Acked-by: Arnd Bergmann Acked-by: Olof Johansson Acked-by: Barry Song <21cnbao@gmail.com> Signed-off-by: Russell King --- .../devicetree/bindings/arm/l2cc.txt | 42 +++++++ arch/arm/include/asm/hardware/cache-l2x0.h | 17 +++ arch/arm/mm/cache-l2x0.c | 103 ++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/l2cc.txt diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt new file mode 100644 index 000000000000..f50e021a0998 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -0,0 +1,42 @@ +* ARM L2 Cache Controller + +ARM cores often have a separate level 2 cache controller. There are various +implementations of the L2 cache controller with compatible programming models. +The ARM L2 cache representation in the device tree should be done as follows: + +Required properties: + +- compatible : should be one of: + "arm,pl310-cache" + "arm,l220-cache" + "arm,l210-cache" +- cache-unified : Specifies the cache is a unified cache. +- cache-level : Should be set to 2 for a level 2 cache. +- reg : Physical base address and size of cache controller's memory mapped + registers. + +Optional properties: + +- arm,data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of + read, write and setup latencies. Minimum valid values are 1. Controllers + without setup latency control should use a value of 0. +- arm,tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of + read, write and setup latencies. Controllers without setup latency control + should use 0. Controllers without separate read and write Tag RAM latency + values should only use the first cell. +- arm,dirty-latency : Cycles of latency for Dirty RAMs. This is a single cell. +- arm,filter-ranges : Starting address and length of window to + filter. Addresses in the filter window are directed to the M1 port. Other + addresses will go to the M0 port. + +Example: + +L2: cache-controller { + compatible = "arm,pl310-cache"; + reg = <0xfff12000 0x1000>; + arm,data-latency = <1 1 1>; + arm,tag-latency = <2 2 2>; + arm,filter-latency = <0x80000000 0x8000000>; + cache-unified; + cache-level = <2>; +}; diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 99a6ed7e1bfd..c48cb1e1c46c 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -52,6 +52,8 @@ #define L2X0_LOCKDOWN_WAY_D_BASE 0x900 #define L2X0_LOCKDOWN_WAY_I_BASE 0x904 #define L2X0_LOCKDOWN_STRIDE 0x08 +#define L2X0_ADDR_FILTER_START 0xC00 +#define L2X0_ADDR_FILTER_END 0xC04 #define L2X0_TEST_OPERATION 0xF00 #define L2X0_LINE_DATA 0xF10 #define L2X0_LINE_TAG 0xF30 @@ -67,6 +69,14 @@ #define L2X0_CACHE_ID_PART_L310 (3 << 6) #define L2X0_AUX_CTRL_MASK 0xc0000fff +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT 0 +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK 0x7 +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT 3 +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK (0x7 << 3) +#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT 6 +#define L2X0_AUX_CTRL_TAG_LATENCY_MASK (0x7 << 6) +#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT 9 +#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK (0x7 << 9) #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT 16 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT 17 #define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x7 << 17) @@ -77,8 +87,15 @@ #define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT 29 #define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT 30 +#define L2X0_LATENCY_CTRL_SETUP_SHIFT 0 +#define L2X0_LATENCY_CTRL_RD_SHIFT 4 +#define L2X0_LATENCY_CTRL_WR_SHIFT 8 + +#define L2X0_ADDR_FILTER_EN 1 + #ifndef __ASSEMBLY__ extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask); +extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask); #endif #endif diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 9ecfdb511951..db4484f5bf98 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,9 +16,12 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include +#include +#include #include #include @@ -372,3 +375,103 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", ways, cache_id, aux, l2x0_size); } + +#ifdef CONFIG_OF +static void __init l2x0_of_setup(const struct device_node *np, + __u32 *aux_val, __u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static void __init pl310_of_setup(const struct device_node *np, + __u32 *aux_val, __u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | + ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | + ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), + l2x0_base + L2X0_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | + ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | + ((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), + l2x0_base + L2X0_DATA_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[0] && filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L2X0_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, + l2x0_base + L2X0_ADDR_FILTER_START); + } +} + +static const struct of_device_id l2x0_ids[] __initconst = { + { .compatible = "arm,pl310-cache", .data = pl310_of_setup }, + { .compatible = "arm,l220-cache", .data = l2x0_of_setup }, + { .compatible = "arm,l210-cache", .data = l2x0_of_setup }, + {} +}; + +int __init l2x0_of_init(__u32 aux_val, __u32 aux_mask) +{ + struct device_node *np; + void (*l2_setup)(const struct device_node *np, + __u32 *aux_val, __u32 *aux_mask); + + np = of_find_matching_node(NULL, l2x0_ids); + if (!np) + return -ENODEV; + l2x0_base = of_iomap(np, 0); + if (!l2x0_base) + return -ENOMEM; + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + l2_setup = of_match_node(l2x0_ids, np)->data; + if (l2_setup) + l2_setup(np, &aux_val, &aux_mask); + } + l2x0_init(l2x0_base, aux_val, aux_mask); + return 0; +} +#endif From 1caf30924f71ae16a26aa59b02a6401f467bf1c8 Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Fri, 9 Sep 2011 10:30:34 +0100 Subject: [PATCH 74/77] ARM: 7089/1: L2X0: add explicit cpu_relax() for busy wait loop using cpu_relax in busy loops is a well-known idiom in the kernel. It's more for documentation purposes than technically needed here. Signed-off-by: Barry Song Acked-by: Arnd Bergmann Reviewed-by: Jamie Iles Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index db4484f5bf98..a78044885ed5 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -37,7 +37,7 @@ static inline void cache_wait_way(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) - ; + cpu_relax(); } #ifdef CONFIG_CACHE_PL310 From 74d41f39a9c161cd0434bb13d929d75fc7be75bd Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Wed, 14 Sep 2011 03:20:01 +0100 Subject: [PATCH 75/77] ARM: 7090/1: CACHE-L2X0: filter start address can be 0 and is often 0 this patch fixes the error in Rob Herring's ARM: 7009/1: l2x0: Add OF based initialization http://www.spinics.net/lists/arm-kernel/msg131123.html it has been in rmk/for-next with commit 41c86ff5b Cc: Shawn Guo Cc: Arnd Bergmann Signed-off-by: Barry Song Acked-by: Rob Herring Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index a78044885ed5..0d85d221d7b0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -437,7 +437,7 @@ static void __init pl310_of_setup(const struct device_node *np, of_property_read_u32_array(np, "arm,filter-ranges", filter, ARRAY_SIZE(filter)); - if (filter[0] && filter[1]) { + if (filter[1]) { writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), l2x0_base + L2X0_ADDR_FILTER_END); writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, From 8d4e652d1b2539196efaef051956fa29e22e9c10 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Aug 2011 18:03:17 +0100 Subject: [PATCH 76/77] ARM: 7023/1: L2x0: Add interrupts property to OF binding Following the discussion here: http://lists.ozlabs.org/pipermail/devicetree-discuss/2011-August/007301.html The L2x0 L2 Cache Controllers support a combined interrupt line which can be used for several events (e.g. read/write/parity errors on tag/data RAM, event counter increment/overflow). Unfortunately the OF binding added in c519ecf2 ("ARM: 7009/1: l2x0: Add OF based initialization") does not represent the interrupt. This patch adds an "interrupts" property to the L2x0 OF binding, representing the combined interrupt line. Signed-off-by: Mark Rutland Acked-by: Rob Herring Acked-by: Will Deacon Cc: Grant Likely Cc: Arnd Bergmann Cc: Olof Johansson Cc: Barry Song <21cnbao@gmail.com> Signed-off-by: Russell King --- Documentation/devicetree/bindings/arm/l2cc.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt index f50e021a0998..7ca52161e7ab 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -28,6 +28,7 @@ Optional properties: - arm,filter-ranges : Starting address and length of window to filter. Addresses in the filter window are directed to the M1 port. Other addresses will go to the M0 port. +- interrupts : 1 combined interrupt. Example: @@ -39,4 +40,5 @@ L2: cache-controller { arm,filter-latency = <0x80000000 0x8000000>; cache-unified; cache-level = <2>; + interrupts = <45>; }; From 91c2ebb90b1890abc648ba9dec5608cbc97e1cb9 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 30 Sep 2011 14:43:12 +0100 Subject: [PATCH 77/77] ARM: 7114/1: cache-l2x0: add resume entry for l2 in secure mode we save the l2x0 registers at the first initialization, and platform codes can get them to restore l2x0 status after wakeup. Cc: Lorenzo Pieralisi Signed-off-by: Barry Song Reviewed-by: Santosh Shilimkar Tested-by: Shawn Guo Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 25 ++++ arch/arm/include/asm/outercache.h | 7 ++ arch/arm/kernel/asm-offsets.c | 12 ++ arch/arm/mm/cache-l2x0.c | 129 +++++++++++++++++++-- 4 files changed, 163 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index c48cb1e1c46c..434edccdf7f3 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -67,6 +67,13 @@ #define L2X0_CACHE_ID_PART_MASK (0xf << 6) #define L2X0_CACHE_ID_PART_L210 (1 << 6) #define L2X0_CACHE_ID_PART_L310 (3 << 6) +#define L2X0_CACHE_ID_RTL_MASK 0x3f +#define L2X0_CACHE_ID_RTL_R0P0 0x0 +#define L2X0_CACHE_ID_RTL_R1P0 0x2 +#define L2X0_CACHE_ID_RTL_R2P0 0x4 +#define L2X0_CACHE_ID_RTL_R3P0 0x5 +#define L2X0_CACHE_ID_RTL_R3P1 0x6 +#define L2X0_CACHE_ID_RTL_R3P2 0x8 #define L2X0_AUX_CTRL_MASK 0xc0000fff #define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT 0 @@ -96,6 +103,24 @@ #ifndef __ASSEMBLY__ extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask); extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask); + +struct l2x0_regs { + unsigned long phy_base; + unsigned long aux_ctrl; + /* + * Whether the following registers need to be saved/restored + * depends on platform + */ + unsigned long tag_latency; + unsigned long data_latency; + unsigned long filter_start; + unsigned long filter_end; + unsigned long prefetch_ctrl; + unsigned long pwr_ctrl; +}; + +extern struct l2x0_regs l2x0_saved_regs; + #endif #endif diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index d8387437ec5a..53426c66352a 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -34,6 +34,7 @@ struct outer_cache_fns { void (*sync)(void); #endif void (*set_debug)(unsigned long); + void (*resume)(void); }; #ifdef CONFIG_OUTER_CACHE @@ -74,6 +75,12 @@ static inline void outer_disable(void) outer_cache.disable(); } +static inline void outer_resume(void) +{ + if (outer_cache.resume) + outer_cache.resume(); +} + #else static inline void outer_inv_range(phys_addr_t start, phys_addr_t end) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 16baba2e4369..1429d8989fb9 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* @@ -92,6 +93,17 @@ int main(void) DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_CACHE_L2X0 + DEFINE(L2X0_R_PHY_BASE, offsetof(struct l2x0_regs, phy_base)); + DEFINE(L2X0_R_AUX_CTRL, offsetof(struct l2x0_regs, aux_ctrl)); + DEFINE(L2X0_R_TAG_LATENCY, offsetof(struct l2x0_regs, tag_latency)); + DEFINE(L2X0_R_DATA_LATENCY, offsetof(struct l2x0_regs, data_latency)); + DEFINE(L2X0_R_FILTER_START, offsetof(struct l2x0_regs, filter_start)); + DEFINE(L2X0_R_FILTER_END, offsetof(struct l2x0_regs, filter_end)); + DEFINE(L2X0_R_PREFETCH_CTRL, offsetof(struct l2x0_regs, prefetch_ctrl)); + DEFINE(L2X0_R_PWR_CTRL, offsetof(struct l2x0_regs, pwr_ctrl)); + BLANK(); +#endif #ifdef CONFIG_CPU_HAS_ASID DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); BLANK(); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 0d85d221d7b0..3f9b9980478e 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -33,6 +33,14 @@ static DEFINE_SPINLOCK(l2x0_lock); static uint32_t l2x0_way_mask; /* Bitmask of active ways */ static uint32_t l2x0_size; +struct l2x0_regs l2x0_saved_regs; + +struct l2x0_of_data { + void (*setup)(const struct device_node *, __u32 *, __u32 *); + void (*save)(void); + void (*resume)(void); +}; + static inline void cache_wait_way(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ @@ -280,7 +288,7 @@ static void l2x0_disable(void) spin_unlock_irqrestore(&l2x0_lock, flags); } -static void __init l2x0_unlock(__u32 cache_id) +static void l2x0_unlock(__u32 cache_id) { int lockregs; int i; @@ -356,6 +364,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) /* l2x0 controller is disabled */ writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); + l2x0_saved_regs.aux_ctrl = aux; + l2x0_inv_all(); /* enable L2X0 */ @@ -445,33 +455,132 @@ static void __init pl310_of_setup(const struct device_node *np, } } +static void __init pl310_save(void) +{ + u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base + + L2X0_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base + + L2X0_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base + + L2X0_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base + + L2X0_ADDR_FILTER_START); + + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { + /* + * From r2p0, there is Prefetch offset/control register + */ + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base + + L2X0_PREFETCH_CTRL); + /* + * From r3p0, there is Power control register + */ + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base + + L2X0_POWER_CTRL); + } +} + +static void l2x0_resume(void) +{ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* restore aux ctrl and enable l2 */ + l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); + + writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base + + L2X0_AUX_CTRL); + + l2x0_inv_all(); + + writel_relaxed(1, l2x0_base + L2X0_CTRL); + } +} + +static void pl310_resume(void) +{ + u32 l2x0_revision; + + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + l2x0_base + L2X0_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + l2x0_base + L2X0_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + l2x0_base + L2X0_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + l2x0_base + L2X0_ADDR_FILTER_START); + + l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + l2x0_base + L2X0_PREFETCH_CTRL); + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) + writel_relaxed(l2x0_saved_regs.pwr_ctrl, + l2x0_base + L2X0_POWER_CTRL); + } + } + + l2x0_resume(); +} + +static const struct l2x0_of_data pl310_data = { + pl310_of_setup, + pl310_save, + pl310_resume, +}; + +static const struct l2x0_of_data l2x0_data = { + l2x0_of_setup, + NULL, + l2x0_resume, +}; + static const struct of_device_id l2x0_ids[] __initconst = { - { .compatible = "arm,pl310-cache", .data = pl310_of_setup }, - { .compatible = "arm,l220-cache", .data = l2x0_of_setup }, - { .compatible = "arm,l210-cache", .data = l2x0_of_setup }, + { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, + { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, + { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, {} }; int __init l2x0_of_init(__u32 aux_val, __u32 aux_mask) { struct device_node *np; - void (*l2_setup)(const struct device_node *np, - __u32 *aux_val, __u32 *aux_mask); + struct l2x0_of_data *data; + struct resource res; np = of_find_matching_node(NULL, l2x0_ids); if (!np) return -ENODEV; - l2x0_base = of_iomap(np, 0); + + if (of_address_to_resource(np, 0, &res)) + return -ENODEV; + + l2x0_base = ioremap(res.start, resource_size(&res)); if (!l2x0_base) return -ENOMEM; + l2x0_saved_regs.phy_base = res.start; + + data = of_match_node(l2x0_ids, np)->data; + /* L2 configuration can only be changed if the cache is disabled */ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { - l2_setup = of_match_node(l2x0_ids, np)->data; - if (l2_setup) - l2_setup(np, &aux_val, &aux_mask); + if (data->setup) + data->setup(np, &aux_val, &aux_mask); } + + if (data->save) + data->save(); + l2x0_init(l2x0_base, aux_val, aux_mask); + + outer_cache.resume = data->resume; return 0; } #endif