From a8e23a291852cd7c4fb5ca696dbb93912185ad10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Oct 2010 15:32:57 -0700 Subject: [PATCH 001/139] mm,x86: fix kmap_atomic_push vs ioremap_32.c It appears i386 uses kmap_atomic infrastructure regardless of CONFIG_HIGHMEM which results in a compile error when highmem is disabled. Cure this by providing the needed few bits for both CONFIG_HIGHMEM and CONFIG_X86_32. Signed-off-by: Peter Zijlstra Reported-by: Chris Wilson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 46 ++++++++++++++++++++++------------------- mm/highmem.c | 6 +++++- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8a85ec109a3a..102f76be90da 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -37,27 +37,6 @@ extern unsigned long totalhigh_pages; void kmap_flush_unused(void); -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __get_cpu_var(__kmap_atomic_idx)++; -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx > KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx_pop(void) -{ - int idx = --__get_cpu_var(__kmap_atomic_idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(idx < 0); -#endif - return idx; -} - #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -95,6 +74,31 @@ static inline void __kunmap_atomic(void *addr) #endif /* CONFIG_HIGHMEM */ +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) + +DECLARE_PER_CPU(int, __kmap_atomic_idx); + +static inline int kmap_atomic_idx_push(void) +{ + int idx = __get_cpu_var(__kmap_atomic_idx)++; +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + BUG_ON(idx > KM_TYPE_NR); +#endif + return idx; +} + +static inline int kmap_atomic_idx_pop(void) +{ + int idx = --__get_cpu_var(__kmap_atomic_idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(idx < 0); +#endif + return idx; +} + +#endif + /* * Make both: kmap_atomic(page, idx) and kmap_atomic(page) work. */ diff --git a/mm/highmem.c b/mm/highmem.c index 781e754a75ac..693394daa2ed 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -29,6 +29,11 @@ #include #include + +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +DEFINE_PER_CPU(int, __kmap_atomic_idx); +#endif + /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -43,7 +48,6 @@ unsigned long totalhigh_pages __read_mostly; EXPORT_SYMBOL(totalhigh_pages); -DEFINE_PER_CPU(int, __kmap_atomic_idx); EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); unsigned int nr_free_highpages (void) From 20273941f2129aa5a432796d98a276ed73d60782 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Oct 2010 15:32:58 -0700 Subject: [PATCH 002/139] mm: fix race in kunmap_atomic() Christoph reported a nice splat which illustrated a race in the new stack based kmap_atomic implementation. The problem is that we pop our stack slot before we're completely done resetting its state -- in particular clearing the PTE (sometimes that's CONFIG_DEBUG_HIGHMEM). If an interrupt happens before we actually clear the PTE used for the last slot, that interrupt can reuse the slot in a dirty state, which triggers a BUG in kmap_atomic(). Fix this by introducing kmap_atomic_idx() which reports the current slot index without actually releasing it and use that to find the PTE and delay the _pop() until after we're completely done. Signed-off-by: Peter Zijlstra Reported-by: Christoph Hellwig Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/highmem.c | 3 ++- arch/frv/mm/highmem.c | 3 ++- arch/mips/mm/highmem.c | 3 ++- arch/mn10300/include/asm/highmem.h | 4 +++- arch/powerpc/mm/highmem.c | 4 +++- arch/sparc/mm/highmem.c | 4 +++- arch/tile/mm/highmem.c | 3 ++- arch/x86/mm/highmem_32.c | 3 ++- arch/x86/mm/iomap_32.c | 3 ++- include/linux/highmem.h | 5 +++++ 10 files changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index c00f119babbf..c435fd9e1da9 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -89,7 +89,7 @@ void __kunmap_atomic(void *kvaddr) int idx, type; if (kvaddr >= (void *)FIXADDR_START) { - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); if (cache_is_vivt()) @@ -101,6 +101,7 @@ void __kunmap_atomic(void *kvaddr) #else (void) idx; /* to kill a warning */ #endif + kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index 61088dcc1594..fd7fcd4c2e33 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(__kmap_atomic); void __kunmap_atomic(void *kvaddr) { - int type = kmap_atomic_idx_pop(); + int type = kmap_atomic_idx(); switch (type) { case 0: __kunmap_atomic_primary(4, 6); break; case 1: __kunmap_atomic_primary(5, 7); break; @@ -83,6 +83,7 @@ void __kunmap_atomic(void *kvaddr) default: BUG(); } + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 1e69b1fb4b85..3634c7ea06ac 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { int idx = type + KM_TYPE_NR * smp_processor_id(); @@ -89,6 +89,7 @@ void __kunmap_atomic(void *kvaddr) local_flush_tlb_one(vaddr); } #endif + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h index f577ba2268ca..e2155e686451 100644 --- a/arch/mn10300/include/asm/highmem.h +++ b/arch/mn10300/include/asm/highmem.h @@ -101,7 +101,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #if HIGHMEM_DEBUG { @@ -119,6 +119,8 @@ static inline void __kunmap_atomic(unsigned long vaddr) __flush_tlb_one(vaddr); } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index b0848b462bbc..e7450bdbe83a 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -62,7 +62,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { @@ -79,6 +79,8 @@ void __kunmap_atomic(void *kvaddr) local_flush_tlb_page(NULL, vaddr); } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 5e50c09b7dce..4730eac0747b 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -75,7 +75,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { @@ -104,6 +104,8 @@ void __kunmap_atomic(void *kvaddr) #endif } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 8ef6595e162c..abb57331cf6e 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -241,7 +241,7 @@ void __kunmap_atomic(void *kvaddr) pte_t pteval = *pte; int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR*smp_processor_id(); /* @@ -252,6 +252,7 @@ void __kunmap_atomic(void *kvaddr) BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); kmap_atomic_unregister(pte_page(pteval), vaddr); kpte_clear_flush(pte, vaddr); + kmap_atomic_idx_pop(); } else { /* Must be a lowmem page */ BUG_ON(vaddr < PAGE_OFFSET); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d723e369003c..b49962662101 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -87,6 +87,7 @@ void __kunmap_atomic(void *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } #ifdef CONFIG_DEBUG_HIGHMEM else { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 75a3d7f24a2c..7b179b499fa3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -98,7 +98,7 @@ iounmap_atomic(void __iomem *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -111,6 +111,7 @@ iounmap_atomic(void __iomem *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } pagefault_enable(); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 102f76be90da..e9138198e823 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -88,6 +88,11 @@ static inline int kmap_atomic_idx_push(void) return idx; } +static inline int kmap_atomic_idx(void) +{ + return __get_cpu_var(__kmap_atomic_idx) - 1; +} + static inline int kmap_atomic_idx_pop(void) { int idx = --__get_cpu_var(__kmap_atomic_idx); From d31eb5194bbdcc7db9fd6956c293e45093bfb394 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 27 Oct 2010 15:32:58 -0700 Subject: [PATCH 003/139] tile: enable ARCH_DMA_ADDR_T_64BIT Signed-off-by: FUJITA Tomonori Acked-by: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 89cfee07efa9..7e8c2844e093 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -58,6 +58,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_PHYS_ADDR_T_64BIT def_bool y +config ARCH_DMA_ADDR_T_64BIT + def_bool y + config LOCKDEP_SUPPORT def_bool y From 8881cdceb25b4fcebfb17a9548ed80c22cf8b066 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 27 Oct 2010 15:32:59 -0700 Subject: [PATCH 004/139] dmi: log board, system, and BIOS information Put basic system information in the dmesg log. There are lots of dmesg logs on the web, and it would be useful if they contained this information for debugging platform problems. "BOARD/PRODUCT" format copied from show_regs_common(), which is used in the oops path. Signed-off-by: Bjorn Helgaas Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b3d22d659990..e28e41668177 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -361,6 +362,33 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) } } +static void __init print_filtered(const char *info) +{ + const char *p; + + if (!info) + return; + + for (p = info; *p; p++) + if (isprint(*p)) + printk(KERN_CONT "%c", *p); + else + printk(KERN_CONT "\\x%02x", *p & 0xff); +} + +static void __init dmi_dump_ids(void) +{ + printk(KERN_DEBUG "DMI: "); + print_filtered(dmi_get_system_info(DMI_BOARD_NAME)); + printk(KERN_CONT "/"); + print_filtered(dmi_get_system_info(DMI_PRODUCT_NAME)); + printk(KERN_CONT ", BIOS "); + print_filtered(dmi_get_system_info(DMI_BIOS_VERSION)); + printk(KERN_CONT " "); + print_filtered(dmi_get_system_info(DMI_BIOS_DATE)); + printk(KERN_CONT "\n"); +} + static int __init dmi_present(const char __iomem *p) { u8 buf[15]; @@ -381,8 +409,10 @@ static int __init dmi_present(const char __iomem *p) buf[14] >> 4, buf[14] & 0xF); else printk(KERN_INFO "DMI present.\n"); - if (dmi_walk_early(dmi_decode) == 0) + if (dmi_walk_early(dmi_decode) == 0) { + dmi_dump_ids(); return 0; + } } return 1; } From 1e0ad2881d50becaeea70ec696a80afeadf944d2 Mon Sep 17 00:00:00 2001 From: Graham Gower Date: Wed, 27 Oct 2010 15:33:00 -0700 Subject: [PATCH 005/139] drivers/char/vt_ioctl.c: fix VT_OPENQRY error value When all VT's are in use, VT_OPENQRY casts -1 to unsigned char before returning it to userspace as an int. VT255 is not the next available console. Signed-off-by: Graham Gower Cc: Greg KH Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/vt_ioctl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 38df8c19e74c..6b68a0fb4611 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -503,6 +503,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, struct kbd_struct * kbd; unsigned int console; unsigned char ucval; + unsigned int uival; void __user *up = (void __user *)arg; int i, perm; int ret = 0; @@ -657,7 +658,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, break; case KDGETMODE: - ucval = vc->vc_mode; + uival = vc->vc_mode; goto setint; case KDMAPDISP: @@ -695,7 +696,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, break; case KDGKBMODE: - ucval = ((kbd->kbdmode == VC_RAW) ? K_RAW : + uival = ((kbd->kbdmode == VC_RAW) ? K_RAW : (kbd->kbdmode == VC_MEDIUMRAW) ? K_MEDIUMRAW : (kbd->kbdmode == VC_UNICODE) ? K_UNICODE : K_XLATE); @@ -717,9 +718,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, break; case KDGKBMETA: - ucval = (vc_kbd_mode(kbd, VC_META) ? K_ESCPREFIX : K_METABIT); + uival = (vc_kbd_mode(kbd, VC_META) ? K_ESCPREFIX : K_METABIT); setint: - ret = put_user(ucval, (int __user *)arg); + ret = put_user(uival, (int __user *)arg); break; case KDGETKEYCODE: @@ -949,7 +950,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, for (i = 0; i < MAX_NR_CONSOLES; ++i) if (! VT_IS_IN_USE(i)) break; - ucval = i < MAX_NR_CONSOLES ? (i+1) : -1; + uival = i < MAX_NR_CONSOLES ? (i+1) : -1; goto setint; /* From 9aa449bed21515a3406f60238ce4747e4118b628 Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Wed, 27 Oct 2010 15:33:01 -0700 Subject: [PATCH 006/139] rtc: rtc-lpc32xx: introduce RTC driver for the LPC32XX SoC Add an RTC driver for the built-in RTC in the LPC32XX SoC. This patch includes updates from the initial review comments and updates from the v3 review. Signed-off-by: Kevin Wells Signed-off-by: Durgesh Pattamatta Reviewed-by: Wolfram Sang Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 9 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-lpc32xx.c | 414 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 424 insertions(+) create mode 100644 drivers/rtc/rtc-lpc32xx.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 2785a0f16c9f..636ba8224884 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -952,4 +952,13 @@ config RTC_DRV_JZ4740 This driver can also be buillt as a module. If so, the module will be called rtc-jz4740. +config RTC_DRV_LPC32XX + depends on ARCH_LPC32XX + tristate "NXP LPC32XX RTC" + help + This enables support for the NXP RTC in the LPC32XX + + This driver can also be buillt as a module. If so, the module + will be called rtc-lpc32xx. + endif # RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 0f207b3b5833..7a7cb3228a1d 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_RTC_DRV_IMXDI) += rtc-imxdi.o obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o obj-$(CONFIG_RTC_DRV_ISL12022) += rtc-isl12022.o obj-$(CONFIG_RTC_DRV_JZ4740) += rtc-jz4740.o +obj-$(CONFIG_RTC_DRV_LPC32XX) += rtc-lpc32xx.o obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o obj-$(CONFIG_RTC_DRV_M48T35) += rtc-m48t35.o diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c new file mode 100644 index 000000000000..ec8701ce99f9 --- /dev/null +++ b/drivers/rtc/rtc-lpc32xx.c @@ -0,0 +1,414 @@ +/* + * Copyright (C) 2010 NXP Semiconductors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Clock and Power control register offsets + */ +#define LPC32XX_RTC_UCOUNT 0x00 +#define LPC32XX_RTC_DCOUNT 0x04 +#define LPC32XX_RTC_MATCH0 0x08 +#define LPC32XX_RTC_MATCH1 0x0C +#define LPC32XX_RTC_CTRL 0x10 +#define LPC32XX_RTC_INTSTAT 0x14 +#define LPC32XX_RTC_KEY 0x18 +#define LPC32XX_RTC_SRAM 0x80 + +#define LPC32XX_RTC_CTRL_MATCH0 (1 << 0) +#define LPC32XX_RTC_CTRL_MATCH1 (1 << 1) +#define LPC32XX_RTC_CTRL_ONSW_MATCH0 (1 << 2) +#define LPC32XX_RTC_CTRL_ONSW_MATCH1 (1 << 3) +#define LPC32XX_RTC_CTRL_SW_RESET (1 << 4) +#define LPC32XX_RTC_CTRL_CNTR_DIS (1 << 6) +#define LPC32XX_RTC_CTRL_ONSW_FORCE_HI (1 << 7) + +#define LPC32XX_RTC_INTSTAT_MATCH0 (1 << 0) +#define LPC32XX_RTC_INTSTAT_MATCH1 (1 << 1) +#define LPC32XX_RTC_INTSTAT_ONSW (1 << 2) + +#define LPC32XX_RTC_KEY_ONSW_LOADVAL 0xB5C13F27 + +#define RTC_NAME "rtc-lpc32xx" + +#define rtc_readl(dev, reg) \ + __raw_readl((dev)->rtc_base + (reg)) +#define rtc_writel(dev, reg, val) \ + __raw_writel((val), (dev)->rtc_base + (reg)) + +struct lpc32xx_rtc { + void __iomem *rtc_base; + int irq; + unsigned char alarm_enabled; + struct rtc_device *rtc; + spinlock_t lock; +}; + +static int lpc32xx_rtc_read_time(struct device *dev, struct rtc_time *time) +{ + unsigned long elapsed_sec; + struct lpc32xx_rtc *rtc = dev_get_drvdata(dev); + + elapsed_sec = rtc_readl(rtc, LPC32XX_RTC_UCOUNT); + rtc_time_to_tm(elapsed_sec, time); + + return rtc_valid_tm(time); +} + +static int lpc32xx_rtc_set_mmss(struct device *dev, unsigned long secs) +{ + struct lpc32xx_rtc *rtc = dev_get_drvdata(dev); + u32 tmp; + + spin_lock_irq(&rtc->lock); + + /* RTC must be disabled during count update */ + tmp = rtc_readl(rtc, LPC32XX_RTC_CTRL); + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp | LPC32XX_RTC_CTRL_CNTR_DIS); + rtc_writel(rtc, LPC32XX_RTC_UCOUNT, secs); + rtc_writel(rtc, LPC32XX_RTC_DCOUNT, 0xFFFFFFFF - secs); + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp &= ~LPC32XX_RTC_CTRL_CNTR_DIS); + + spin_unlock_irq(&rtc->lock); + + return 0; +} + +static int lpc32xx_rtc_read_alarm(struct device *dev, + struct rtc_wkalrm *wkalrm) +{ + struct lpc32xx_rtc *rtc = dev_get_drvdata(dev); + + rtc_time_to_tm(rtc_readl(rtc, LPC32XX_RTC_MATCH0), &wkalrm->time); + wkalrm->enabled = rtc->alarm_enabled; + wkalrm->pending = !!(rtc_readl(rtc, LPC32XX_RTC_INTSTAT) & + LPC32XX_RTC_INTSTAT_MATCH0); + + return rtc_valid_tm(&wkalrm->time); +} + +static int lpc32xx_rtc_set_alarm(struct device *dev, + struct rtc_wkalrm *wkalrm) +{ + struct lpc32xx_rtc *rtc = dev_get_drvdata(dev); + unsigned long alarmsecs; + u32 tmp; + int ret; + + ret = rtc_tm_to_time(&wkalrm->time, &alarmsecs); + if (ret < 0) { + dev_warn(dev, "Failed to convert time: %d\n", ret); + return ret; + } + + spin_lock_irq(&rtc->lock); + + /* Disable alarm during update */ + tmp = rtc_readl(rtc, LPC32XX_RTC_CTRL); + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp & ~LPC32XX_RTC_CTRL_MATCH0); + + rtc_writel(rtc, LPC32XX_RTC_MATCH0, alarmsecs); + + rtc->alarm_enabled = wkalrm->enabled; + if (wkalrm->enabled) { + rtc_writel(rtc, LPC32XX_RTC_INTSTAT, + LPC32XX_RTC_INTSTAT_MATCH0); + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp | + LPC32XX_RTC_CTRL_MATCH0); + } + + spin_unlock_irq(&rtc->lock); + + return 0; +} + +static int lpc32xx_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct lpc32xx_rtc *rtc = dev_get_drvdata(dev); + u32 tmp; + + spin_lock_irq(&rtc->lock); + tmp = rtc_readl(rtc, LPC32XX_RTC_CTRL); + + if (enabled) { + rtc->alarm_enabled = 1; + tmp |= LPC32XX_RTC_CTRL_MATCH0; + } else { + rtc->alarm_enabled = 0; + tmp &= ~LPC32XX_RTC_CTRL_MATCH0; + } + + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp); + spin_unlock_irq(&rtc->lock); + + return 0; +} + +static irqreturn_t lpc32xx_rtc_alarm_interrupt(int irq, void *dev) +{ + struct lpc32xx_rtc *rtc = dev; + + spin_lock(&rtc->lock); + + /* Disable alarm interrupt */ + rtc_writel(rtc, LPC32XX_RTC_CTRL, + rtc_readl(rtc, LPC32XX_RTC_CTRL) & + ~LPC32XX_RTC_CTRL_MATCH0); + rtc->alarm_enabled = 0; + + /* + * Write a large value to the match value so the RTC won't + * keep firing the match status + */ + rtc_writel(rtc, LPC32XX_RTC_MATCH0, 0xFFFFFFFF); + rtc_writel(rtc, LPC32XX_RTC_INTSTAT, LPC32XX_RTC_INTSTAT_MATCH0); + + spin_unlock(&rtc->lock); + + rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops lpc32xx_rtc_ops = { + .read_time = lpc32xx_rtc_read_time, + .set_mmss = lpc32xx_rtc_set_mmss, + .read_alarm = lpc32xx_rtc_read_alarm, + .set_alarm = lpc32xx_rtc_set_alarm, + .alarm_irq_enable = lpc32xx_rtc_alarm_irq_enable, +}; + +static int __devinit lpc32xx_rtc_probe(struct platform_device *pdev) +{ + struct resource *res; + struct lpc32xx_rtc *rtc; + resource_size_t size; + int rtcirq; + u32 tmp; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Can't get memory resource\n"); + return -ENOENT; + } + + rtcirq = platform_get_irq(pdev, 0); + if (rtcirq < 0 || rtcirq >= NR_IRQS) { + dev_warn(&pdev->dev, "Can't get interrupt resource\n"); + rtcirq = -1; + } + + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); + if (unlikely(!rtc)) { + dev_err(&pdev->dev, "Can't allocate memory\n"); + return -ENOMEM; + } + rtc->irq = rtcirq; + + size = resource_size(res); + + if (!devm_request_mem_region(&pdev->dev, res->start, size, + pdev->name)) { + dev_err(&pdev->dev, "RTC registers are not free\n"); + return -EBUSY; + } + + rtc->rtc_base = devm_ioremap(&pdev->dev, res->start, size); + if (!rtc->rtc_base) { + dev_err(&pdev->dev, "Can't map memory\n"); + return -ENOMEM; + } + + spin_lock_init(&rtc->lock); + + /* + * The RTC is on a seperate power domain and can keep it's state + * across a chip power cycle. If the RTC has never been previously + * setup, then set it up now for the first time. + */ + tmp = rtc_readl(rtc, LPC32XX_RTC_CTRL); + if (rtc_readl(rtc, LPC32XX_RTC_KEY) != LPC32XX_RTC_KEY_ONSW_LOADVAL) { + tmp &= ~(LPC32XX_RTC_CTRL_SW_RESET | + LPC32XX_RTC_CTRL_CNTR_DIS | + LPC32XX_RTC_CTRL_MATCH0 | + LPC32XX_RTC_CTRL_MATCH1 | + LPC32XX_RTC_CTRL_ONSW_MATCH0 | + LPC32XX_RTC_CTRL_ONSW_MATCH1 | + LPC32XX_RTC_CTRL_ONSW_FORCE_HI); + rtc_writel(rtc, LPC32XX_RTC_CTRL, tmp); + + /* Clear latched interrupt states */ + rtc_writel(rtc, LPC32XX_RTC_MATCH0, 0xFFFFFFFF); + rtc_writel(rtc, LPC32XX_RTC_INTSTAT, + LPC32XX_RTC_INTSTAT_MATCH0 | + LPC32XX_RTC_INTSTAT_MATCH1 | + LPC32XX_RTC_INTSTAT_ONSW); + + /* Write key value to RTC so it won't reload on reset */ + rtc_writel(rtc, LPC32XX_RTC_KEY, + LPC32XX_RTC_KEY_ONSW_LOADVAL); + } else { + rtc_writel(rtc, LPC32XX_RTC_CTRL, + tmp & ~LPC32XX_RTC_CTRL_MATCH0); + } + + platform_set_drvdata(pdev, rtc); + + rtc->rtc = rtc_device_register(RTC_NAME, &pdev->dev, &lpc32xx_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc->rtc)) { + dev_err(&pdev->dev, "Can't get RTC\n"); + platform_set_drvdata(pdev, NULL); + return PTR_ERR(rtc->rtc); + } + + /* + * IRQ is enabled after device registration in case alarm IRQ + * is pending upon suspend exit. + */ + if (rtc->irq >= 0) { + if (devm_request_irq(&pdev->dev, rtc->irq, + lpc32xx_rtc_alarm_interrupt, + IRQF_DISABLED, pdev->name, rtc) < 0) { + dev_warn(&pdev->dev, "Can't request interrupt.\n"); + rtc->irq = -1; + } else { + device_init_wakeup(&pdev->dev, 1); + } + } + + return 0; +} + +static int __devexit lpc32xx_rtc_remove(struct platform_device *pdev) +{ + struct lpc32xx_rtc *rtc = platform_get_drvdata(pdev); + + if (rtc->irq >= 0) + device_init_wakeup(&pdev->dev, 0); + + platform_set_drvdata(pdev, NULL); + rtc_device_unregister(rtc->rtc); + + return 0; +} + +#ifdef CONFIG_PM +static int lpc32xx_rtc_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpc32xx_rtc *rtc = platform_get_drvdata(pdev); + + if (rtc->irq >= 0) { + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(rtc->irq); + else + disable_irq_wake(rtc->irq); + } + + return 0; +} + +static int lpc32xx_rtc_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpc32xx_rtc *rtc = platform_get_drvdata(pdev); + + if (rtc->irq >= 0 && device_may_wakeup(&pdev->dev)) + disable_irq_wake(rtc->irq); + + return 0; +} + +/* Unconditionally disable the alarm */ +static int lpc32xx_rtc_freeze(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpc32xx_rtc *rtc = platform_get_drvdata(pdev); + + spin_lock_irq(&rtc->lock); + + rtc_writel(rtc, LPC32XX_RTC_CTRL, + rtc_readl(rtc, LPC32XX_RTC_CTRL) & + ~LPC32XX_RTC_CTRL_MATCH0); + + spin_unlock_irq(&rtc->lock); + + return 0; +} + +static int lpc32xx_rtc_thaw(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct lpc32xx_rtc *rtc = platform_get_drvdata(pdev); + + if (rtc->alarm_enabled) { + spin_lock_irq(&rtc->lock); + + rtc_writel(rtc, LPC32XX_RTC_CTRL, + rtc_readl(rtc, LPC32XX_RTC_CTRL) | + LPC32XX_RTC_CTRL_MATCH0); + + spin_unlock_irq(&rtc->lock); + } + + return 0; +} + +static const struct dev_pm_ops lpc32xx_rtc_pm_ops = { + .suspend = lpc32xx_rtc_suspend, + .resume = lpc32xx_rtc_resume, + .freeze = lpc32xx_rtc_freeze, + .thaw = lpc32xx_rtc_thaw, + .restore = lpc32xx_rtc_resume +}; + +#define LPC32XX_RTC_PM_OPS (&lpc32xx_rtc_pm_ops) +#else +#define LPC32XX_RTC_PM_OPS NULL +#endif + +static struct platform_driver lpc32xx_rtc_driver = { + .probe = lpc32xx_rtc_probe, + .remove = __devexit_p(lpc32xx_rtc_remove), + .driver = { + .name = RTC_NAME, + .owner = THIS_MODULE, + .pm = LPC32XX_RTC_PM_OPS + }, +}; + +static int __init lpc32xx_rtc_init(void) +{ + return platform_driver_register(&lpc32xx_rtc_driver); +} +module_init(lpc32xx_rtc_init); + +static void __exit lpc32xx_rtc_exit(void) +{ + platform_driver_unregister(&lpc32xx_rtc_driver); +} +module_exit(lpc32xx_rtc_exit); + +MODULE_AUTHOR("Kevin Wells Date: Wed, 27 Oct 2010 15:33:03 -0700 Subject: [PATCH 007/139] rtc-bfin: shrink/optimize interrupt handler a bit By unifying the RTC_ISTAT clearing steps, we shrink the interrupt handler and avoid multiple writes to the hardware registers. Signed-off-by: Mike Frysinger Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-bfin.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index d4fb82d85e9b..3506f330e97e 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -183,29 +183,33 @@ static irqreturn_t bfin_rtc_interrupt(int irq, void *dev_id) struct bfin_rtc *rtc = dev_get_drvdata(dev); unsigned long events = 0; bool write_complete = false; - u16 rtc_istat, rtc_ictl; + u16 rtc_istat, rtc_istat_clear, rtc_ictl, bits; dev_dbg_stamp(dev); rtc_istat = bfin_read_RTC_ISTAT(); rtc_ictl = bfin_read_RTC_ICTL(); + rtc_istat_clear = 0; - if (rtc_istat & RTC_ISTAT_WRITE_COMPLETE) { - bfin_write_RTC_ISTAT(RTC_ISTAT_WRITE_COMPLETE); + bits = RTC_ISTAT_WRITE_COMPLETE; + if (rtc_istat & bits) { + rtc_istat_clear |= bits; write_complete = true; complete(&bfin_write_complete); } - if (rtc_ictl & (RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY)) { - if (rtc_istat & (RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY)) { - bfin_write_RTC_ISTAT(RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY); + bits = (RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY); + if (rtc_ictl & bits) { + if (rtc_istat & bits) { + rtc_istat_clear |= bits; events |= RTC_AF | RTC_IRQF; } } - if (rtc_ictl & RTC_ISTAT_SEC) { - if (rtc_istat & RTC_ISTAT_SEC) { - bfin_write_RTC_ISTAT(RTC_ISTAT_SEC); + bits = RTC_ISTAT_SEC; + if (rtc_ictl & bits) { + if (rtc_istat & bits) { + rtc_istat_clear |= bits; events |= RTC_UF | RTC_IRQF; } } @@ -213,9 +217,10 @@ static irqreturn_t bfin_rtc_interrupt(int irq, void *dev_id) if (events) rtc_update_irq(rtc->rtc_dev, 1, events); - if (write_complete || events) + if (write_complete || events) { + bfin_write_RTC_ISTAT(rtc_istat_clear); return IRQ_HANDLED; - else + } else return IRQ_NONE; } From d7c7ef908b6497bb871e2e113e66e8fb0f757543 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 27 Oct 2010 15:33:04 -0700 Subject: [PATCH 008/139] rtc-bfin: add debug markers to suspend/resume paths The rest of the driver had debug markings already. This also standardizes the usage of "dev" a bit. Signed-off-by: Mike Frysinger Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-bfin.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index 3506f330e97e..b4b6087f2234 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -2,7 +2,7 @@ * Blackfin On-Chip Real Time Clock Driver * Supports BF51x/BF52x/BF53[123]/BF53[467]/BF54x * - * Copyright 2004-2009 Analog Devices Inc. + * Copyright 2004-2010 Analog Devices Inc. * * Enter bugs at http://blackfin.uclinux.org/ * @@ -427,9 +427,13 @@ static int __devexit bfin_rtc_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state) { - if (device_may_wakeup(&pdev->dev)) { + struct device *dev = &pdev->dev; + + dev_dbg_stamp(dev); + + if (device_may_wakeup(dev)) { enable_irq_wake(IRQ_RTC); - bfin_rtc_sync_pending(&pdev->dev); + bfin_rtc_sync_pending(dev); } else bfin_rtc_int_clear(0); @@ -438,7 +442,11 @@ static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state) static int bfin_rtc_resume(struct platform_device *pdev) { - if (device_may_wakeup(&pdev->dev)) + struct device *dev = &pdev->dev; + + dev_dbg_stamp(dev); + + if (device_may_wakeup(dev)) disable_irq_wake(IRQ_RTC); /* From 59cca865f21e9e7beab73fcf79ba4eb776a4c228 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 27 Oct 2010 15:33:04 -0700 Subject: [PATCH 009/139] drivers/rtc/class.c: fix device_register() error handling If device_register() fails then call put_device(). See comment to device_register. Signed-off-by: Vasiliy Kulikov Cc: Alessandro Zummo Cc: Wan ZongShun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/class.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 565562ba6ac9..e6539cbabb35 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -158,8 +158,10 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, rtc_dev_prepare(rtc); err = device_register(&rtc->dev); - if (err) + if (err) { + put_device(&rtc->dev); goto exit_kfree; + } rtc_dev_add_device(rtc); rtc_sysfs_add_device(rtc); From fa5b07820fe3a0fc06ac368516e71f10a59b9539 Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Wed, 27 Oct 2010 15:33:05 -0700 Subject: [PATCH 010/139] rtc: omap: let device wakeup capability be configured from chip init logic The rtc-omap driver currently hardcodes the RTC wakeup capability to be "not capable". While this seems to be true for existing OMAP1 boards which are not wired for this, the DA850/OMAP-L138 SoC, the RTC can always be wake up source from its "deep sleep" mode. This patch lets the wakeup capability be set from platform data and does not override the setting from the driver. For DA850/OMAP-L138, this is done from arch/arm/mach-davinci/devices-da8xx.c:da8xx_register_rtc() Note that this patch does not change the behavior on any existing OMAP1 board since the platform device registration sets the wakeup capability to 0 by default. Signed-off-by: Sekhar Nori Signed-off-by: Kevin Hilman Cc: Alessandro Zummo Cc: Wan ZongShun Cc: Tony Lindgren Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-omap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 64d9727b7229..73377b0d65da 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -34,7 +34,8 @@ * Board-specific wiring options include using split power mode with * RTC_OFF_NOFF used as the reset signal (so the RTC won't be reset), * and wiring RTC_WAKE_INT (so the RTC alarm can wake the system from - * low power modes). See the BOARD-SPECIFIC CUSTOMIZATION comment. + * low power modes) for OMAP1 boards (OMAP-L138 has this built into + * the SoC). See the BOARD-SPECIFIC CUSTOMIZATION comment. */ #define OMAP_RTC_BASE 0xfffb4800 @@ -401,16 +402,17 @@ static int __init omap_rtc_probe(struct platform_device *pdev) /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: * - * - Boards wired so that RTC_WAKE_INT does something, and muxed - * right (W13_1610_RTC_WAKE_INT is the default after chip reset), - * should initialize the device wakeup flag appropriately. + * - Device wake-up capability setting should come through chip + * init logic. OMAP1 boards should initialize the "wakeup capable" + * flag in the platform device if the board is wired right for + * being woken up by RTC alarm. For OMAP-L138, this capability + * is built into the SoC by the "Deep Sleep" capability. * * - Boards wired so RTC_ON_nOFF is used as the reset signal, * rather than nPWRON_RESET, should forcibly enable split * power mode. (Some chip errata report that RTC_CTRL_SPLIT * is write-only, and always reads as zero...) */ - device_init_wakeup(&pdev->dev, 0); if (new_ctrl & (u8) OMAP_RTC_CTRL_SPLIT) pr_info("%s: split power mode\n", pdev->name); From f61ae6711d69717558e882a78487527705603a74 Mon Sep 17 00:00:00 2001 From: Changhwan Youn Date: Wed, 27 Oct 2010 15:33:06 -0700 Subject: [PATCH 011/139] rtc: rtc-s3c: fix access unit from byte to word on RTCCON S3C2410_RTCCON of TYPE_S3C64XX RTC should be read/written by readw and writew, because TYPE_S3C64XX RTC uses bit 8 and 9. And TYPE_S3C2410 RTC also can access it by readw and writew. [atul.dahiya@samsung.com: tested on smdk2416] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Changhwan Youn Signed-off-by: Kukjin Kim Tested-by: Atul Dahiya Cc: Ben Dooks Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index f57a87f4ae96..a87982d7cb70 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -100,7 +100,7 @@ static int s3c_rtc_setpie(struct device *dev, int enabled) spin_lock_irq(&s3c_rtc_pie_lock); if (s3c_rtc_cpu_type == TYPE_S3C64XX) { - tmp = readb(s3c_rtc_base + S3C2410_RTCCON); + tmp = readw(s3c_rtc_base + S3C2410_RTCCON); tmp &= ~S3C64XX_RTCCON_TICEN; if (enabled) @@ -318,7 +318,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) unsigned int ticnt; if (s3c_rtc_cpu_type == TYPE_S3C64XX) { - ticnt = readb(s3c_rtc_base + S3C2410_RTCCON); + ticnt = readw(s3c_rtc_base + S3C2410_RTCCON); ticnt &= S3C64XX_RTCCON_TICEN; } else { ticnt = readb(s3c_rtc_base + S3C2410_TICNT); @@ -391,11 +391,11 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) return; if (!en) { - tmp = readb(base + S3C2410_RTCCON); + tmp = readw(base + S3C2410_RTCCON); if (s3c_rtc_cpu_type == TYPE_S3C64XX) tmp &= ~S3C64XX_RTCCON_TICEN; tmp &= ~S3C2410_RTCCON_RTCEN; - writeb(tmp, base + S3C2410_RTCCON); + writew(tmp, base + S3C2410_RTCCON); if (s3c_rtc_cpu_type == TYPE_S3C2410) { tmp = readb(base + S3C2410_TICNT); @@ -405,25 +405,28 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) } else { /* re-enable the device, and check it is ok */ - if ((readb(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){ + if ((readw(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0) { dev_info(&pdev->dev, "rtc disabled, re-enabling\n"); - tmp = readb(base + S3C2410_RTCCON); - writeb(tmp|S3C2410_RTCCON_RTCEN, base+S3C2410_RTCCON); + tmp = readw(base + S3C2410_RTCCON); + writew(tmp | S3C2410_RTCCON_RTCEN, + base + S3C2410_RTCCON); } - if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){ + if ((readw(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)) { dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n"); - tmp = readb(base + S3C2410_RTCCON); - writeb(tmp& ~S3C2410_RTCCON_CNTSEL, base+S3C2410_RTCCON); + tmp = readw(base + S3C2410_RTCCON); + writew(tmp & ~S3C2410_RTCCON_CNTSEL, + base + S3C2410_RTCCON); } - if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){ + if ((readw(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)) { dev_info(&pdev->dev, "removing RTCCON_CLKRST\n"); - tmp = readb(base + S3C2410_RTCCON); - writeb(tmp & ~S3C2410_RTCCON_CLKRST, base+S3C2410_RTCCON); + tmp = readw(base + S3C2410_RTCCON); + writew(tmp & ~S3C2410_RTCCON_CLKRST, + base + S3C2410_RTCCON); } } } @@ -514,8 +517,8 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev) s3c_rtc_enable(pdev, 1); - pr_debug("s3c2410_rtc: RTCCON=%02x\n", - readb(s3c_rtc_base + S3C2410_RTCCON)); + pr_debug("s3c2410_rtc: RTCCON=%02x\n", + readw(s3c_rtc_base + S3C2410_RTCCON)); device_init_wakeup(&pdev->dev, 1); @@ -578,7 +581,7 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) /* save TICNT for anyone using periodic interrupts */ ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); if (s3c_rtc_cpu_type == TYPE_S3C64XX) { - ticnt_en_save = readb(s3c_rtc_base + S3C2410_RTCCON); + ticnt_en_save = readw(s3c_rtc_base + S3C2410_RTCCON); ticnt_en_save &= S3C64XX_RTCCON_TICEN; } s3c_rtc_enable(pdev, 0); @@ -596,8 +599,8 @@ static int s3c_rtc_resume(struct platform_device *pdev) s3c_rtc_enable(pdev, 1); writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT); if (s3c_rtc_cpu_type == TYPE_S3C64XX && ticnt_en_save) { - tmp = readb(s3c_rtc_base + S3C2410_RTCCON); - writeb(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); + tmp = readw(s3c_rtc_base + S3C2410_RTCCON); + writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); } if (device_may_wakeup(&pdev->dev)) From dd061d1abe4e637bf755865f776f8088dacd1c0b Mon Sep 17 00:00:00 2001 From: Changhwan Youn Date: Wed, 27 Oct 2010 15:33:08 -0700 Subject: [PATCH 012/139] rtc: rtc-s3c: fix setting missing field of getalarm Current s3c_rtc_getalarm() sets missing field of alarm time with 0xff. But this value should be -1 according to drivers/rtc/interface.c. Signed-off-by: Changhwan Youn Signed-off-by: Kukjin Kim Acked-by: Ben Dooks Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index a87982d7cb70..75b009a17e5b 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -242,34 +242,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) if (alm_en & S3C2410_RTCALM_SECEN) alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec); else - alm_tm->tm_sec = 0xff; + alm_tm->tm_sec = -1; if (alm_en & S3C2410_RTCALM_MINEN) alm_tm->tm_min = bcd2bin(alm_tm->tm_min); else - alm_tm->tm_min = 0xff; + alm_tm->tm_min = -1; if (alm_en & S3C2410_RTCALM_HOUREN) alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour); else - alm_tm->tm_hour = 0xff; + alm_tm->tm_hour = -1; if (alm_en & S3C2410_RTCALM_DAYEN) alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday); else - alm_tm->tm_mday = 0xff; + alm_tm->tm_mday = -1; if (alm_en & S3C2410_RTCALM_MONEN) { alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon); alm_tm->tm_mon -= 1; } else { - alm_tm->tm_mon = 0xff; + alm_tm->tm_mon = -1; } if (alm_en & S3C2410_RTCALM_YEAREN) alm_tm->tm_year = bcd2bin(alm_tm->tm_year); else - alm_tm->tm_year = 0xffff; + alm_tm->tm_year = -1; return 0; } From e6eb524e6e6df4027530b36f76b84a6a076a3249 Mon Sep 17 00:00:00 2001 From: Changhwan Youn Date: Wed, 27 Oct 2010 15:33:09 -0700 Subject: [PATCH 013/139] rtc: rtc-s3c: fix on support RTC Alarm The alarm_irq_enable function should be implemented to support RTC alarm. And fix tabs instead of white space around the proc field. Signed-off-by: Changhwan Youn Signed-off-by: Kukjin Kim Acked-by: Ben Dooks Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 75b009a17e5b..a53c27f30745 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -379,7 +379,8 @@ static const struct rtc_class_ops s3c_rtcops = { .set_alarm = s3c_rtc_setalarm, .irq_set_freq = s3c_rtc_setfreq, .irq_set_state = s3c_rtc_setpie, - .proc = s3c_rtc_proc, + .proc = s3c_rtc_proc, + .alarm_irq_enable = s3c_rtc_setaie, }; static void s3c_rtc_enable(struct platform_device *pdev, int en) From 30ffc40cf52cd68782b50cb699b5eca076ca23ab Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Wed, 27 Oct 2010 15:33:09 -0700 Subject: [PATCH 014/139] rtc: rtc-s3c: Fix debug message format on RTC Fix debug message format. Signed-off-by: Kukjin Kim Acked-by: Ben Dooks Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index a53c27f30745..ead217b1857a 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -171,8 +171,8 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) goto retry_get_time; } - pr_debug("read time %02x.%02x.%02x %02x/%02x/%02x\n", - rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, + pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n", + 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec); rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec); @@ -193,8 +193,8 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) void __iomem *base = s3c_rtc_base; int year = tm->tm_year - 100; - pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n", - tm->tm_year, tm->tm_mon, tm->tm_mday, + pr_debug("set time %04d.%02d.%02d %02d:%02d:%02d\n", + 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); /* we get around y2k by simply not supporting it */ @@ -231,9 +231,9 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0; - pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n", + pr_debug("read alarm %d, %04d.%02d.%02d %02d:%02d:%02d\n", alm_en, - alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday, + 1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday, alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec); @@ -280,10 +280,10 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) void __iomem *base = s3c_rtc_base; unsigned int alrm_en; - pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n", + pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", alrm->enabled, - tm->tm_mday & 0xff, tm->tm_mon & 0xff, tm->tm_year & 0xff, - tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec); + 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN; From e1df962e6cdc431acb6a8da409b6a7d89c4f782e Mon Sep 17 00:00:00 2001 From: Changhwan Youn Date: Wed, 27 Oct 2010 15:33:10 -0700 Subject: [PATCH 015/139] rtc: rtc-s3c: fix RTC initialization method Change RTC initialization method in probe(). The 'rtc_valid_tm(tm)' can check whether RTC BCD is valid or not. And change the method of checking because the previous method cannot validate RTC BCD registers properly. Signed-off-by: Changhwan Youn Signed-off-by: Kukjin Kim Cc: Ben Dooks Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index ead217b1857a..d0498a223f40 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -456,8 +456,8 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev) static int __devinit s3c_rtc_probe(struct platform_device *pdev) { struct rtc_device *rtc; + struct rtc_time rtc_tm; struct resource *res; - unsigned int tmp, i; int ret; pr_debug("%s: probe=%p\n", __func__, pdev); @@ -538,11 +538,19 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev) /* Check RTC Time */ - for (i = S3C2410_RTCSEC; i <= S3C2410_RTCYEAR; i += 0x4) { - tmp = readb(s3c_rtc_base + i); + s3c_rtc_gettime(NULL, &rtc_tm); - if ((tmp & 0xf) > 0x9 || ((tmp >> 4) & 0xf) > 0x9) - writeb(0, s3c_rtc_base + i); + if (rtc_valid_tm(&rtc_tm)) { + rtc_tm.tm_year = 100; + rtc_tm.tm_mon = 0; + rtc_tm.tm_mday = 1; + rtc_tm.tm_hour = 0; + rtc_tm.tm_min = 0; + rtc_tm.tm_sec = 0; + + s3c_rtc_settime(NULL, &rtc_tm); + + dev_warn(&pdev->dev, "warning: invalid RTC value so initializing it\n"); } if (s3c_rtc_cpu_type == TYPE_S3C64XX) From 5b3ffddd8ddb4712dfe14ad3e23eb5494f11bf61 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Wed, 27 Oct 2010 15:33:11 -0700 Subject: [PATCH 016/139] rtc: rtc-s3c: add rtc_valid_tm in s3c_rtc_gettime() Add "rtc_valid_tm" in s3c_rtc_gettime() as per Wan ZongShun's suggestion. Suggested-by: Wan ZongShun Signed-off-by: Kukjin Kim Cc: Ben Dooks Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s3c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d0498a223f40..cf953ecbfca9 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -185,7 +185,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) rtc_tm->tm_year += 100; rtc_tm->tm_mon -= 1; - return 0; + return rtc_valid_tm(rtc_tm); } static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) From f46418c5cadfe76b15c630ff746ca859a8207d71 Mon Sep 17 00:00:00 2001 From: Lan Chunhe-B25806 Date: Wed, 27 Oct 2010 15:33:12 -0700 Subject: [PATCH 017/139] drivers/rtc/rtc-ds3232.c: add alarm function The DS3232 RTC driver only has the tick function. Add an alarm function so the driver is complete. Signed-off-by: Lan Chunhe-B25806 Cc: Alessandro Zummo Cc: Wan ZongShun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 3 +- drivers/rtc/rtc-ds3232.c | 181 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 636ba8224884..6a77437d4f5a 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -171,7 +171,8 @@ config RTC_DRV_DS3232 depends on RTC_CLASS && I2C help If you say yes here you get support for Dallas Semiconductor - DS3232 real-time clock chips. + DS3232 real-time clock chips. If an interrupt is associated + with the device, the alarm functionality is supported. This driver can also be built as a module. If so, the module will be called rtc-ds3232. diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 9de8516e3531..57063552d3b7 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -2,6 +2,7 @@ * RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C * * Copyright (C) 2009-2010 Freescale Semiconductor. + * Author: Jack Lan * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -175,6 +176,182 @@ static int ds3232_set_time(struct device *dev, struct rtc_time *time) DS3232_REG_SECONDS, 7, buf); } +/* + * DS3232 has two alarm, we only use alarm1 + * According to linux specification, only support one-shot alarm + * no periodic alarm mode + */ +static int ds3232_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds3232 *ds3232 = i2c_get_clientdata(client); + int control, stat; + int ret; + u8 buf[4]; + + mutex_lock(&ds3232->mutex); + + ret = i2c_smbus_read_byte_data(client, DS3232_REG_SR); + if (ret < 0) + goto out; + stat = ret; + ret = i2c_smbus_read_byte_data(client, DS3232_REG_CR); + if (ret < 0) + goto out; + control = ret; + ret = i2c_smbus_read_i2c_block_data(client, DS3232_REG_ALARM1, 4, buf); + if (ret < 0) + goto out; + + alarm->time.tm_sec = bcd2bin(buf[0] & 0x7F); + alarm->time.tm_min = bcd2bin(buf[1] & 0x7F); + alarm->time.tm_hour = bcd2bin(buf[2] & 0x7F); + alarm->time.tm_mday = bcd2bin(buf[3] & 0x7F); + + alarm->time.tm_mon = -1; + alarm->time.tm_year = -1; + alarm->time.tm_wday = -1; + alarm->time.tm_yday = -1; + alarm->time.tm_isdst = -1; + + alarm->enabled = !!(control & DS3232_REG_CR_A1IE); + alarm->pending = !!(stat & DS3232_REG_SR_A1F); + + ret = 0; +out: + mutex_unlock(&ds3232->mutex); + return ret; +} + +/* + * linux rtc-module does not support wday alarm + * and only 24h time mode supported indeed + */ +static int ds3232_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds3232 *ds3232 = i2c_get_clientdata(client); + int control, stat; + int ret; + u8 buf[4]; + + if (client->irq <= 0) + return -EINVAL; + + mutex_lock(&ds3232->mutex); + + buf[0] = bin2bcd(alarm->time.tm_sec); + buf[1] = bin2bcd(alarm->time.tm_min); + buf[2] = bin2bcd(alarm->time.tm_hour); + buf[3] = bin2bcd(alarm->time.tm_mday); + + /* clear alarm interrupt enable bit */ + ret = i2c_smbus_read_byte_data(client, DS3232_REG_CR); + if (ret < 0) + goto out; + control = ret; + control &= ~(DS3232_REG_CR_A1IE | DS3232_REG_CR_A2IE); + ret = i2c_smbus_write_byte_data(client, DS3232_REG_CR, control); + if (ret < 0) + goto out; + + /* clear any pending alarm flag */ + ret = i2c_smbus_read_byte_data(client, DS3232_REG_SR); + if (ret < 0) + goto out; + stat = ret; + stat &= ~(DS3232_REG_SR_A1F | DS3232_REG_SR_A2F); + ret = i2c_smbus_write_byte_data(client, DS3232_REG_SR, stat); + if (ret < 0) + goto out; + + ret = i2c_smbus_write_i2c_block_data(client, DS3232_REG_ALARM1, 4, buf); + + if (alarm->enabled) { + control |= DS3232_REG_CR_A1IE; + ret = i2c_smbus_write_byte_data(client, DS3232_REG_CR, control); + } +out: + mutex_unlock(&ds3232->mutex); + return ret; +} + +static void ds3232_update_alarm(struct i2c_client *client) +{ + struct ds3232 *ds3232 = i2c_get_clientdata(client); + int control; + int ret; + u8 buf[4]; + + mutex_lock(&ds3232->mutex); + + ret = i2c_smbus_read_i2c_block_data(client, DS3232_REG_ALARM1, 4, buf); + if (ret < 0) + goto unlock; + + buf[0] = bcd2bin(buf[0]) < 0 || (ds3232->rtc->irq_data & RTC_UF) ? + 0x80 : buf[0]; + buf[1] = bcd2bin(buf[1]) < 0 || (ds3232->rtc->irq_data & RTC_UF) ? + 0x80 : buf[1]; + buf[2] = bcd2bin(buf[2]) < 0 || (ds3232->rtc->irq_data & RTC_UF) ? + 0x80 : buf[2]; + buf[3] = bcd2bin(buf[3]) < 0 || (ds3232->rtc->irq_data & RTC_UF) ? + 0x80 : buf[3]; + + ret = i2c_smbus_write_i2c_block_data(client, DS3232_REG_ALARM1, 4, buf); + if (ret < 0) + goto unlock; + + control = i2c_smbus_read_byte_data(client, DS3232_REG_CR); + if (control < 0) + goto unlock; + + if (ds3232->rtc->irq_data & (RTC_AF | RTC_UF)) + /* enable alarm1 interrupt */ + control |= DS3232_REG_CR_A1IE; + else + /* disable alarm1 interrupt */ + control &= ~(DS3232_REG_CR_A1IE); + i2c_smbus_write_byte_data(client, DS3232_REG_CR, control); + +unlock: + mutex_unlock(&ds3232->mutex); +} + +static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds3232 *ds3232 = i2c_get_clientdata(client); + + if (client->irq <= 0) + return -EINVAL; + + if (enabled) + ds3232->rtc->irq_data |= RTC_AF; + else + ds3232->rtc->irq_data &= ~RTC_AF; + + ds3232_update_alarm(client); + return 0; +} + +static int ds3232_update_irq_enable(struct device *dev, unsigned int enabled) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds3232 *ds3232 = i2c_get_clientdata(client); + + if (client->irq <= 0) + return -EINVAL; + + if (enabled) + ds3232->rtc->irq_data |= RTC_UF; + else + ds3232->rtc->irq_data &= ~RTC_UF; + + ds3232_update_alarm(client); + return 0; +} + static irqreturn_t ds3232_irq(int irq, void *dev_id) { struct i2c_client *client = dev_id; @@ -222,6 +399,10 @@ unlock: static const struct rtc_class_ops ds3232_rtc_ops = { .read_time = ds3232_read_time, .set_time = ds3232_set_time, + .read_alarm = ds3232_read_alarm, + .set_alarm = ds3232_set_alarm, + .alarm_irq_enable = ds3232_alarm_irq_enable, + .update_irq_enable = ds3232_update_irq_enable, }; static int __devinit ds3232_probe(struct i2c_client *client, From d0f744c8cbd19a8d07eccb15bb08e6a29c4d5192 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 27 Oct 2010 15:33:12 -0700 Subject: [PATCH 018/139] drivers/rtc/rtc-jz4740.c: add alarm function Add the "alarm" function to the jz4740 RTC. Interrupts will now be raised when the "alarm" time is reached. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Paul Cercueil Cc: Wan ZongShun Cc: Alessandro Zummo Cc: Lars-Peter Clausen Cc: Paul Gortmaker Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-jz4740.c | 45 +++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index 2619d57b91d7..2e16f72c9056 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009-2010, Lars-Peter Clausen + * Copyright (C) 2010, Paul Cercueil * JZ4740 SoC RTC driver * * This program is free software; you can redistribute it and/or modify it @@ -161,7 +162,8 @@ static int jz4740_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) ret = jz4740_rtc_reg_write(rtc, JZ_REG_RTC_SEC_ALARM, secs); if (!ret) - ret = jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_AE, alrm->enabled); + ret = jz4740_rtc_ctrl_set_bits(rtc, + JZ_RTC_CTRL_AE | JZ_RTC_CTRL_AF_IRQ, alrm->enabled); return ret; } @@ -258,6 +260,8 @@ static int __devinit jz4740_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); + device_init_wakeup(&pdev->dev, 1); + rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &jz4740_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { @@ -318,12 +322,43 @@ static int __devexit jz4740_rtc_remove(struct platform_device *pdev) return 0; } + +#ifdef CONFIG_PM +static int jz4740_rtc_suspend(struct device *dev) +{ + struct jz4740_rtc *rtc = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + enable_irq_wake(rtc->irq); + return 0; +} + +static int jz4740_rtc_resume(struct device *dev) +{ + struct jz4740_rtc *rtc = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(rtc->irq); + return 0; +} + +static const struct dev_pm_ops jz4740_pm_ops = { + .suspend = jz4740_rtc_suspend, + .resume = jz4740_rtc_resume, +}; +#define JZ4740_RTC_PM_OPS (&jz4740_pm_ops) + +#else +#define JZ4740_RTC_PM_OPS NULL +#endif /* CONFIG_PM */ + struct platform_driver jz4740_rtc_driver = { - .probe = jz4740_rtc_probe, - .remove = __devexit_p(jz4740_rtc_remove), - .driver = { - .name = "jz4740-rtc", + .probe = jz4740_rtc_probe, + .remove = __devexit_p(jz4740_rtc_remove), + .driver = { + .name = "jz4740-rtc", .owner = THIS_MODULE, + .pm = JZ4740_RTC_PM_OPS, }, }; From aeec56e331c6d2750de02ef34b305338305ca690 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Oct 2010 15:33:15 -0700 Subject: [PATCH 019/139] gpio: add driver for basic memory-mapped GPIO controllers The basic GPIO controllers may be found in various on-board FPGA and ASIC solutions that are used to control board's switches, LEDs, chip-selects, Ethernet/USB PHY power, etc. These controllers may not provide any means of pin setup (in/out/open drain). The driver supports: - 8/16/32/64 bits registers; - GPIO controllers with clear/set registers; - GPIO controllers with a single "data" register; - Big endian bits/GPIOs ordering (mostly used on PowerPC). Signed-off-by: Anton Vorontsov Reviewed-by: Mark Brown Cc: David Brownell Cc: Samuel Ortiz , Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 5 + drivers/gpio/Makefile | 1 + drivers/gpio/basic_mmio_gpio.c | 297 ++++++++++++++++++++++++++++++++ include/linux/basic_mmio_gpio.h | 20 +++ 4 files changed, 323 insertions(+) create mode 100644 drivers/gpio/basic_mmio_gpio.c create mode 100644 include/linux/basic_mmio_gpio.h diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 510aa2054544..e47ef94be379 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -70,6 +70,11 @@ config GPIO_MAX730X comment "Memory mapped GPIO expanders:" +config GPIO_BASIC_MMIO + tristate "Basic memory-mapped GPIO controllers support" + help + Say yes here to support basic memory-mapped GPIO controllers. + config GPIO_IT8761E tristate "IT8761E GPIO support" depends on GPIOLIB diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index fc6019d93720..3ff0651fd173 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIO_ADP5520) += adp5520-gpio.o obj-$(CONFIG_GPIO_ADP5588) += adp5588-gpio.o +obj-$(CONFIG_GPIO_BASIC_MMIO) += basic_mmio_gpio.o obj-$(CONFIG_GPIO_LANGWELL) += langwell_gpio.o obj-$(CONFIG_GPIO_MAX730X) += max730x.o obj-$(CONFIG_GPIO_MAX7300) += max7300.o diff --git a/drivers/gpio/basic_mmio_gpio.c b/drivers/gpio/basic_mmio_gpio.c new file mode 100644 index 000000000000..3addea65894e --- /dev/null +++ b/drivers/gpio/basic_mmio_gpio.c @@ -0,0 +1,297 @@ +/* + * Driver for basic memory-mapped GPIO controllers. + * + * Copyright 2008 MontaVista Software, Inc. + * Copyright 2008,2010 Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ....``.```~~~~````.`.`.`.`.```````'',,,.........`````......`....... + * ...`` ```````.. + * ..The simplest form of a GPIO controller that the driver supports is`` + * `.just a single "data" register, where GPIO state can be read and/or ` + * `,..written. ,,..``~~~~ .....``.`.`.~~.```.`.........``````.``````` + * ````````` + ___ +_/~~|___/~| . ```~~~~~~ ___/___\___ ,~.`.`.`.`````.~~...,,,,... +__________|~$@~~~ %~ /o*o*o*o*o*o\ .. Implementing such a GPIO . +o ` ~~~~\___/~~~~ ` controller in FPGA is ,.` + `....trivial..'~`.```.``` + * ``````` + * .```````~~~~`..`.``.``. + * . The driver supports `... ,..```.`~~~```````````````....````.``,, + * . big-endian notation, just`. .. A bit more sophisticated controllers , + * . register the device with -be`. .with a pair of set/clear-bit registers , + * `.. suffix. ```~~`````....`.` . affecting the data register and the .` + * ``.`.``...``` ```.. output pins are also supported.` + * ^^ `````.`````````.,``~``~``~~`````` + * . ^^ + * ,..`.`.`...````````````......`.`.`.`.`.`..`.`.`.. + * .. The expectation is that in at least some cases . ,-~~~-, + * .this will be used with roll-your-own ASIC/FPGA .` \ / + * .logic in Verilog or VHDL. ~~~`````````..`````~~` \ / + * ..````````......``````````` \o_ + * | + * ^^ / \ + * + * ...`````~~`.....``.`..........``````.`.``.```........``. + * ` 8, 16, 32 and 64 bits registers are supported, and``. + * . the number of GPIOs is determined by the width of ~ + * .. the registers. ,............```.`.`..`.`.~~~.`.`.`~ + * `.......````.``` + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct bgpio_chip { + struct gpio_chip gc; + void __iomem *reg_dat; + void __iomem *reg_set; + void __iomem *reg_clr; + + /* Number of bits (GPIOs): * 8. */ + int bits; + + /* + * Some GPIO controllers work with the big-endian bits notation, + * e.g. in a 8-bits register, GPIO7 is the least significant bit. + */ + int big_endian_bits; + + /* + * Used to lock bgpio_chip->data. Also, this is needed to keep + * shadowed and real data registers writes together. + */ + spinlock_t lock; + + /* Shadowed data register to clear/set bits safely. */ + unsigned long data; +}; + +static struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct bgpio_chip, gc); +} + +static unsigned long bgpio_in(struct bgpio_chip *bgc) +{ + switch (bgc->bits) { + case 8: + return __raw_readb(bgc->reg_dat); + case 16: + return __raw_readw(bgc->reg_dat); + case 32: + return __raw_readl(bgc->reg_dat); +#if BITS_PER_LONG >= 64 + case 64: + return __raw_readq(bgc->reg_dat); +#endif + } + return -EINVAL; +} + +static void bgpio_out(struct bgpio_chip *bgc, void __iomem *reg, + unsigned long data) +{ + switch (bgc->bits) { + case 8: + __raw_writeb(data, reg); + return; + case 16: + __raw_writew(data, reg); + return; + case 32: + __raw_writel(data, reg); + return; +#if BITS_PER_LONG >= 64 + case 64: + __raw_writeq(data, reg); + return; +#endif + } +} + +static unsigned long bgpio_pin2mask(struct bgpio_chip *bgc, unsigned int pin) +{ + if (bgc->big_endian_bits) + return 1 << (bgc->bits - 1 - pin); + else + return 1 << pin; +} + +static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct bgpio_chip *bgc = to_bgpio_chip(gc); + + return bgpio_in(bgc) & bgpio_pin2mask(bgc, gpio); +} + +static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct bgpio_chip *bgc = to_bgpio_chip(gc); + unsigned long mask = bgpio_pin2mask(bgc, gpio); + unsigned long flags; + + if (bgc->reg_set) { + if (val) + bgpio_out(bgc, bgc->reg_set, mask); + else + bgpio_out(bgc, bgc->reg_clr, mask); + return; + } + + spin_lock_irqsave(&bgc->lock, flags); + + if (val) + bgc->data |= mask; + else + bgc->data &= ~mask; + + bgpio_out(bgc, bgc->reg_dat, bgc->data); + + spin_unlock_irqrestore(&bgc->lock, flags); +} + +static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + return 0; +} + +static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + bgpio_set(gc, gpio, val); + return 0; +} + +static int __devinit bgpio_probe(struct platform_device *pdev) +{ + const struct platform_device_id *platid = platform_get_device_id(pdev); + struct device *dev = &pdev->dev; + struct bgpio_pdata *pdata = dev_get_platdata(dev); + struct bgpio_chip *bgc; + struct resource *res_dat; + struct resource *res_set; + struct resource *res_clr; + resource_size_t dat_sz; + int bits; + int ret; + + res_dat = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat"); + if (!res_dat) + return -EINVAL; + + dat_sz = resource_size(res_dat); + if (!is_power_of_2(dat_sz)) + return -EINVAL; + + bits = dat_sz * 8; + if (bits > BITS_PER_LONG) + return -EINVAL; + + bgc = devm_kzalloc(dev, sizeof(*bgc), GFP_KERNEL); + if (!bgc) + return -ENOMEM; + + bgc->reg_dat = devm_ioremap(dev, res_dat->start, dat_sz); + if (!bgc->reg_dat) + return -ENOMEM; + + res_set = platform_get_resource_byname(pdev, IORESOURCE_MEM, "set"); + res_clr = platform_get_resource_byname(pdev, IORESOURCE_MEM, "clr"); + if (res_set && res_clr) { + if (resource_size(res_set) != resource_size(res_clr) || + resource_size(res_set) != dat_sz) + return -EINVAL; + + bgc->reg_set = devm_ioremap(dev, res_set->start, dat_sz); + bgc->reg_clr = devm_ioremap(dev, res_clr->start, dat_sz); + if (!bgc->reg_set || !bgc->reg_clr) + return -ENOMEM; + } else if (res_set || res_clr) { + return -EINVAL; + } + + spin_lock_init(&bgc->lock); + + bgc->bits = bits; + bgc->big_endian_bits = !strcmp(platid->name, "basic-mmio-gpio-be"); + bgc->data = bgpio_in(bgc); + + bgc->gc.ngpio = bits; + bgc->gc.direction_input = bgpio_dir_in; + bgc->gc.direction_output = bgpio_dir_out; + bgc->gc.get = bgpio_get; + bgc->gc.set = bgpio_set; + bgc->gc.dev = dev; + bgc->gc.label = dev_name(dev); + + if (pdata) + bgc->gc.base = pdata->base; + else + bgc->gc.base = -1; + + dev_set_drvdata(dev, bgc); + + ret = gpiochip_add(&bgc->gc); + if (ret) + dev_err(dev, "gpiochip_add() failed: %d\n", ret); + + return ret; +} + +static int __devexit bgpio_remove(struct platform_device *pdev) +{ + struct bgpio_chip *bgc = dev_get_drvdata(&pdev->dev); + + return gpiochip_remove(&bgc->gc); +} + +static const struct platform_device_id bgpio_id_table[] = { + { "basic-mmio-gpio", }, + { "basic-mmio-gpio-be", }, + {}, +}; +MODULE_DEVICE_TABLE(platform, bgpio_id_table); + +static struct platform_driver bgpio_driver = { + .driver = { + .name = "basic-mmio-gpio", + }, + .id_table = bgpio_id_table, + .probe = bgpio_probe, + .remove = __devexit_p(bgpio_remove), +}; + +static int __init bgpio_init(void) +{ + return platform_driver_register(&bgpio_driver); +} +module_init(bgpio_init); + +static void __exit bgpio_exit(void) +{ + platform_driver_unregister(&bgpio_driver); +} +module_exit(bgpio_exit); + +MODULE_DESCRIPTION("Driver for basic memory-mapped GPIO controllers"); +MODULE_AUTHOR("Anton Vorontsov "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h new file mode 100644 index 000000000000..198087a16fc4 --- /dev/null +++ b/include/linux/basic_mmio_gpio.h @@ -0,0 +1,20 @@ +/* + * Basic memory-mapped GPIO controllers. + * + * Copyright 2008 MontaVista Software, Inc. + * Copyright 2008,2010 Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __BASIC_MMIO_GPIO_H +#define __BASIC_MMIO_GPIO_H + +struct bgpio_pdata { + int base; +}; + +#endif /* __BASIC_MMIO_GPIO_H */ From 09cd9527d621640d4dd231dff77b681e711d8e4b Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Oct 2010 15:33:16 -0700 Subject: [PATCH 020/139] gpiolib: fix HAVE_GPIO_LIB leftovers in asm-generic/gpio.h commit 7444a72effa632fcd8edc566f88 ("gpiolib: allow user-selection") removed HAVE_GPIO_LIB Kconfig symbol, but the header file still uses the name [to confuse readers wrt #ifdef/#else/#endif location]. The real Kconfig symbol nowadays is CONFIG_GPIOLIB. Signed-off-by: Anton Vorontsov Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/gpio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 8ca18e26d7e3..ff5c66080c8c 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -210,7 +210,7 @@ extern void gpio_unexport(unsigned gpio); #endif /* CONFIG_GPIO_SYSFS */ -#else /* !CONFIG_HAVE_GPIO_LIB */ +#else /* !CONFIG_GPIOLIB */ static inline int gpio_is_valid(int number) { @@ -239,7 +239,7 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) gpio_set_value(gpio, value); } -#endif /* !CONFIG_HAVE_GPIO_LIB */ +#endif /* !CONFIG_GPIOLIB */ #ifndef CONFIG_GPIO_SYSFS From 76d800a5b6e198c4fda07b88bb42a545709f193b Mon Sep 17 00:00:00 2001 From: Tomas Hallenberg Date: Wed, 27 Oct 2010 15:33:17 -0700 Subject: [PATCH 021/139] gpio: timbgpio: use a copy of the IER register to avoid it being trashed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some versions of the hardware can trash the IER register if simultaneous interrupts occur. This patch works around it by using a local copy of the register and restoring it after every interrupt. Signed-off-by: Tomas Hallenberg Acked-by: Richard Röjfors Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/timbgpio.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c index ddd053108a13..45293662e950 100644 --- a/drivers/gpio/timbgpio.c +++ b/drivers/gpio/timbgpio.c @@ -47,6 +47,7 @@ struct timbgpio { spinlock_t lock; /* mutual exclusion */ struct gpio_chip gpio; int irq_base; + unsigned long last_ier; }; static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index, @@ -112,16 +113,24 @@ static void timbgpio_irq_disable(unsigned irq) { struct timbgpio *tgpio = get_irq_chip_data(irq); int offset = irq - tgpio->irq_base; + unsigned long flags; - timbgpio_update_bit(&tgpio->gpio, offset, TGPIO_IER, 0); + spin_lock_irqsave(&tgpio->lock, flags); + tgpio->last_ier &= ~(1 << offset); + iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER); + spin_unlock_irqrestore(&tgpio->lock, flags); } static void timbgpio_irq_enable(unsigned irq) { struct timbgpio *tgpio = get_irq_chip_data(irq); int offset = irq - tgpio->irq_base; + unsigned long flags; - timbgpio_update_bit(&tgpio->gpio, offset, TGPIO_IER, 1); + spin_lock_irqsave(&tgpio->lock, flags); + tgpio->last_ier |= 1 << offset; + iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER); + spin_unlock_irqrestore(&tgpio->lock, flags); } static int timbgpio_irq_type(unsigned irq, unsigned trigger) @@ -194,8 +203,16 @@ static void timbgpio_irq(unsigned int irq, struct irq_desc *desc) ipr = ioread32(tgpio->membase + TGPIO_IPR); iowrite32(ipr, tgpio->membase + TGPIO_ICR); + /* + * Some versions of the hardware trash the IER register if more than + * one interrupt is received simultaneously. + */ + iowrite32(0, tgpio->membase + TGPIO_IER); + for_each_set_bit(offset, &ipr, tgpio->gpio.ngpio) generic_handle_irq(timbgpio_to_irq(&tgpio->gpio, offset)); + + iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER); } static struct irq_chip timbgpio_irqchip = { From ead6db084392349ad33323b1bb2916058dd7e82b Mon Sep 17 00:00:00 2001 From: Miguel Gaio Date: Wed, 27 Oct 2010 15:33:18 -0700 Subject: [PATCH 022/139] gpio: add support for 74x164 serial-in/parallel-out 8-bit shift register Add support for generic 74x164 serial-in/parallel-out 8-bits shift register. This driver can be used as a GPIO output expander. [akpm@linux-foundation.org: remove unused local `refresh'] Signed-off-by: Miguel Gaio Signed-off-by: Juhos Gabor Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/74x164.c | 182 +++++++++++++++++++++++++++++++++++++ drivers/gpio/Kconfig | 8 ++ drivers/gpio/Makefile | 1 + include/linux/spi/74x164.h | 11 +++ 4 files changed, 202 insertions(+) create mode 100644 drivers/gpio/74x164.c create mode 100644 include/linux/spi/74x164.h diff --git a/drivers/gpio/74x164.c b/drivers/gpio/74x164.c new file mode 100644 index 000000000000..d91ff4c282e9 --- /dev/null +++ b/drivers/gpio/74x164.c @@ -0,0 +1,182 @@ +/* + * 74Hx164 - Generic serial-in/parallel-out 8-bits shift register GPIO driver + * + * Copyright (C) 2010 Gabor Juhos + * Copyright (C) 2010 Miguel Gaio + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#define GEN_74X164_GPIO_COUNT 8 + + +struct gen_74x164_chip { + struct spi_device *spi; + struct gpio_chip gpio_chip; + struct mutex lock; + u8 port_config; +}; + +static void gen_74x164_set_value(struct gpio_chip *, unsigned, int); + +static struct gen_74x164_chip *gpio_to_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct gen_74x164_chip, gpio_chip); +} + +static int __gen_74x164_write_config(struct gen_74x164_chip *chip) +{ + return spi_write(chip->spi, + &chip->port_config, sizeof(chip->port_config)); +} + +static int gen_74x164_direction_output(struct gpio_chip *gc, + unsigned offset, int val) +{ + gen_74x164_set_value(gc, offset, val); + return 0; +} + +static int gen_74x164_get_value(struct gpio_chip *gc, unsigned offset) +{ + struct gen_74x164_chip *chip = gpio_to_chip(gc); + int ret; + + mutex_lock(&chip->lock); + ret = (chip->port_config >> offset) & 0x1; + mutex_unlock(&chip->lock); + + return ret; +} + +static void gen_74x164_set_value(struct gpio_chip *gc, + unsigned offset, int val) +{ + struct gen_74x164_chip *chip = gpio_to_chip(gc); + + mutex_lock(&chip->lock); + if (val) + chip->port_config |= (1 << offset); + else + chip->port_config &= ~(1 << offset); + + __gen_74x164_write_config(chip); + mutex_unlock(&chip->lock); +} + +static int __devinit gen_74x164_probe(struct spi_device *spi) +{ + struct gen_74x164_chip *chip; + struct gen_74x164_chip_platform_data *pdata; + int ret; + + pdata = spi->dev.platform_data; + if (!pdata || !pdata->base) { + dev_dbg(&spi->dev, "incorrect or missing platform data\n"); + return -EINVAL; + } + + /* + * bits_per_word cannot be configured in platform data + */ + spi->bits_per_word = 8; + + ret = spi_setup(spi); + if (ret < 0) + return ret; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + mutex_init(&chip->lock); + + dev_set_drvdata(&spi->dev, chip); + + chip->spi = spi; + + chip->gpio_chip.label = GEN_74X164_DRIVER_NAME, + chip->gpio_chip.direction_output = gen_74x164_direction_output; + chip->gpio_chip.get = gen_74x164_get_value; + chip->gpio_chip.set = gen_74x164_set_value; + chip->gpio_chip.base = pdata->base; + chip->gpio_chip.ngpio = GEN_74X164_GPIO_COUNT; + chip->gpio_chip.can_sleep = 1; + chip->gpio_chip.dev = &spi->dev; + chip->gpio_chip.owner = THIS_MODULE; + + ret = __gen_74x164_write_config(chip); + if (ret) { + dev_err(&spi->dev, "Failed writing: %d\n", ret); + goto exit_destroy; + } + + ret = gpiochip_add(&chip->gpio_chip); + if (ret) + goto exit_destroy; + + return ret; + +exit_destroy: + dev_set_drvdata(&spi->dev, NULL); + mutex_destroy(&chip->lock); + kfree(chip); + return ret; +} + +static int gen_74x164_remove(struct spi_device *spi) +{ + struct gen_74x164_chip *chip; + int ret; + + chip = dev_get_drvdata(&spi->dev); + if (chip == NULL) + return -ENODEV; + + dev_set_drvdata(&spi->dev, NULL); + + ret = gpiochip_remove(&chip->gpio_chip); + if (!ret) { + mutex_destroy(&chip->lock); + kfree(chip); + } else + dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n", + ret); + + return ret; +} + +static struct spi_driver gen_74x164_driver = { + .driver = { + .name = GEN_74X164_DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = gen_74x164_probe, + .remove = __devexit_p(gen_74x164_remove), +}; + +static int __init gen_74x164_init(void) +{ + return spi_register_driver(&gen_74x164_driver); +} +subsys_initcall(gen_74x164_init); + +static void __exit gen_74x164_exit(void) +{ + spi_unregister_driver(&gen_74x164_driver); +} +module_exit(gen_74x164_exit); + +MODULE_AUTHOR("Gabor Juhos "); +MODULE_AUTHOR("Miguel Gaio "); +MODULE_DESCRIPTION("GPIO expander driver for 74X164 8-bits shift register"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e47ef94be379..bc7b0fca6415 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -344,6 +344,14 @@ config GPIO_MC33880 SPI driver for Freescale MC33880 high-side/low-side switch. This provides GPIO interface supporting inputs and outputs. +config GPIO_74X164 + tristate "74x164 serial-in/parallel-out 8-bits shift register" + depends on SPI_MASTER + help + Platform driver for 74x164 compatible serial-in/parallel-out + 8-outputs shift registers. This driver can be used to provide access + to more gpio outputs. + comment "AC97 GPIO expanders:" config GPIO_UCB1400 diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 3ff0651fd173..0c23a4dd45e5 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_GPIO_MAX7301) += max7301.o obj-$(CONFIG_GPIO_MAX732X) += max732x.o obj-$(CONFIG_GPIO_MC33880) += mc33880.o obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o +obj-$(CONFIG_GPIO_74X164) += 74x164.o obj-$(CONFIG_GPIO_PCA953X) += pca953x.o obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o obj-$(CONFIG_GPIO_PL061) += pl061.o diff --git a/include/linux/spi/74x164.h b/include/linux/spi/74x164.h new file mode 100644 index 000000000000..d85c52f294a0 --- /dev/null +++ b/include/linux/spi/74x164.h @@ -0,0 +1,11 @@ +#ifndef LINUX_SPI_74X164_H +#define LINUX_SPI_74X164_H + +#define GEN_74X164_DRIVER_NAME "74x164" + +struct gen_74x164_chip_platform_data { + /* number assigned to the first GPIO */ + unsigned base; +}; + +#endif From 459773ae8dbbd480886d186181c6bc2e8556025f Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:19 -0700 Subject: [PATCH 023/139] gpio: adp5588-gpio: support interrupt controller Implement irq_chip functionality on ADP5588/5587 GPIO expanders. Only level sensitive interrupts are supported. Interrupts provided by this irq_chip must be requested using request_threaded_irq(). Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 7 + drivers/gpio/adp5588-gpio.c | 277 +++++++++++++++++++++++++++++++++--- include/linux/i2c/adp5588.h | 15 ++ 3 files changed, 278 insertions(+), 21 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index bc7b0fca6415..3f3181dac8d7 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -272,6 +272,13 @@ config GPIO_ADP5588 To compile this driver as a module, choose M here: the module will be called adp5588-gpio. +config GPIO_ADP5588_IRQ + bool "Interrupt controller support for ADP5588" + depends on GPIO_ADP5588=y + help + Say yes here to enable the adp5588 to be used as an interrupt + controller. It requires the driver to be built in the kernel. + comment "PCI GPIO expanders:" config GPIO_CS5535 diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c index 2e8e9e24f887..0871f78af593 100644 --- a/drivers/gpio/adp5588-gpio.c +++ b/drivers/gpio/adp5588-gpio.c @@ -1,8 +1,8 @@ /* * GPIO Chip driver for Analog Devices - * ADP5588 I/O Expander and QWERTY Keypad Controller + * ADP5588/ADP5587 I/O Expander and QWERTY Keypad Controller * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2010 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -13,21 +13,34 @@ #include #include #include +#include +#include #include -#define DRV_NAME "adp5588-gpio" -#define MAXGPIO 18 -#define ADP_BANK(offs) ((offs) >> 3) -#define ADP_BIT(offs) (1u << ((offs) & 0x7)) +#define DRV_NAME "adp5588-gpio" + +/* + * Early pre 4.0 Silicon required to delay readout by at least 25ms, + * since the Event Counter Register updated 25ms after the interrupt + * asserted. + */ +#define WA_DELAYED_READOUT_REVID(rev) ((rev) < 4) struct adp5588_gpio { struct i2c_client *client; struct gpio_chip gpio_chip; struct mutex lock; /* protect cached dir, dat_out */ + /* protect serialized access to the interrupt controller bus */ + struct mutex irq_lock; unsigned gpio_start; + unsigned irq_base; uint8_t dat_out[3]; uint8_t dir[3]; + uint8_t int_lvl[3]; + uint8_t int_en[3]; + uint8_t irq_mask[3]; + uint8_t irq_stat[3]; }; static int adp5588_gpio_read(struct i2c_client *client, u8 reg) @@ -55,8 +68,8 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off) struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off)) - & ADP_BIT(off)); + return !!(adp5588_gpio_read(dev->client, + GPIO_DAT_STAT1 + ADP5588_BANK(off)) & ADP5588_BIT(off)); } static void adp5588_gpio_set_value(struct gpio_chip *chip, @@ -66,8 +79,8 @@ static void adp5588_gpio_set_value(struct gpio_chip *chip, struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); - bit = ADP_BIT(off); + bank = ADP5588_BANK(off); + bit = ADP5588_BIT(off); mutex_lock(&dev->lock); if (val) @@ -87,10 +100,10 @@ static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off) struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); + bank = ADP5588_BANK(off); mutex_lock(&dev->lock); - dev->dir[bank] &= ~ADP_BIT(off); + dev->dir[bank] &= ~ADP5588_BIT(off); ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]); mutex_unlock(&dev->lock); @@ -105,8 +118,8 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip, struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); - bit = ADP_BIT(off); + bank = ADP5588_BANK(off); + bit = ADP5588_BIT(off); mutex_lock(&dev->lock); dev->dir[bank] |= bit; @@ -125,6 +138,213 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip, return ret; } +#ifdef CONFIG_GPIO_ADP5588_IRQ +static int adp5588_gpio_to_irq(struct gpio_chip *chip, unsigned off) +{ + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + return dev->irq_base + off; +} + +static void adp5588_irq_bus_lock(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + mutex_lock(&dev->irq_lock); +} + + /* + * genirq core code can issue chip->mask/unmask from atomic context. + * This doesn't work for slow busses where an access needs to sleep. + * bus_sync_unlock() is therefore called outside the atomic context, + * syncs the current irq mask state with the slow external controller + * and unlocks the bus. + */ + +static void adp5588_irq_bus_sync_unlock(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + int i; + + for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) + if (dev->int_en[i] ^ dev->irq_mask[i]) { + dev->int_en[i] = dev->irq_mask[i]; + adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i, + dev->int_en[i]); + } + + mutex_unlock(&dev->irq_lock); +} + +static void adp5588_irq_mask(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + unsigned gpio = irq - dev->irq_base; + + dev->irq_mask[ADP5588_BANK(gpio)] &= ~ADP5588_BIT(gpio); +} + +static void adp5588_irq_unmask(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + unsigned gpio = irq - dev->irq_base; + + dev->irq_mask[ADP5588_BANK(gpio)] |= ADP5588_BIT(gpio); +} + +static int adp5588_irq_set_type(unsigned int irq, unsigned int type) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + uint16_t gpio = irq - dev->irq_base; + unsigned bank, bit; + + if ((type & IRQ_TYPE_EDGE_BOTH)) { + dev_err(&dev->client->dev, "irq %d: unsupported type %d\n", + irq, type); + return -EINVAL; + } + + bank = ADP5588_BANK(gpio); + bit = ADP5588_BIT(gpio); + + if (type & IRQ_TYPE_LEVEL_HIGH) + dev->int_lvl[bank] |= bit; + else if (type & IRQ_TYPE_LEVEL_LOW) + dev->int_lvl[bank] &= ~bit; + else + return -EINVAL; + + adp5588_gpio_direction_input(&dev->gpio_chip, gpio); + adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank, + dev->int_lvl[bank]); + + return 0; +} + +static struct irq_chip adp5588_irq_chip = { + .name = "adp5588", + .mask = adp5588_irq_mask, + .unmask = adp5588_irq_unmask, + .bus_lock = adp5588_irq_bus_lock, + .bus_sync_unlock = adp5588_irq_bus_sync_unlock, + .set_type = adp5588_irq_set_type, +}; + +static int adp5588_gpio_read_intstat(struct i2c_client *client, u8 *buf) +{ + int ret = i2c_smbus_read_i2c_block_data(client, GPIO_INT_STAT1, 3, buf); + + if (ret < 0) + dev_err(&client->dev, "Read INT_STAT Error\n"); + + return ret; +} + +static irqreturn_t adp5588_irq_handler(int irq, void *devid) +{ + struct adp5588_gpio *dev = devid; + unsigned status, bank, bit, pending; + int ret; + status = adp5588_gpio_read(dev->client, INT_STAT); + + if (status & ADP5588_GPI_INT) { + ret = adp5588_gpio_read_intstat(dev->client, dev->irq_stat); + if (ret < 0) + memset(dev->irq_stat, 0, ARRAY_SIZE(dev->irq_stat)); + + for (bank = 0; bank <= ADP5588_BANK(ADP5588_MAXGPIO); + bank++, bit = 0) { + pending = dev->irq_stat[bank] & dev->irq_mask[bank]; + + while (pending) { + if (pending & (1 << bit)) { + handle_nested_irq(dev->irq_base + + (bank << 3) + bit); + pending &= ~(1 << bit); + + } + bit++; + } + } + } + + adp5588_gpio_write(dev->client, INT_STAT, status); /* Status is W1C */ + + return IRQ_HANDLED; +} + +static int adp5588_irq_setup(struct adp5588_gpio *dev) +{ + struct i2c_client *client = dev->client; + struct adp5588_gpio_platform_data *pdata = client->dev.platform_data; + unsigned gpio; + int ret; + + adp5588_gpio_write(client, CFG, ADP5588_AUTO_INC); + adp5588_gpio_write(client, INT_STAT, -1); /* status is W1C */ + adp5588_gpio_read_intstat(client, dev->irq_stat); /* read to clear */ + + dev->irq_base = pdata->irq_base; + mutex_init(&dev->irq_lock); + + for (gpio = 0; gpio < dev->gpio_chip.ngpio; gpio++) { + int irq = gpio + dev->irq_base; + set_irq_chip_data(irq, dev); + set_irq_chip_and_handler(irq, &adp5588_irq_chip, + handle_level_irq); + set_irq_nested_thread(irq, 1); +#ifdef CONFIG_ARM + /* + * ARM needs us to explicitly flag the IRQ as VALID, + * once we do so, it will also set the noprobe. + */ + set_irq_flags(irq, IRQF_VALID); +#else + set_irq_noprobe(irq); +#endif + } + + ret = request_threaded_irq(client->irq, + NULL, + adp5588_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + dev_name(&client->dev), dev); + if (ret) { + dev_err(&client->dev, "failed to request irq %d\n", + client->irq); + goto out; + } + + dev->gpio_chip.to_irq = adp5588_gpio_to_irq; + adp5588_gpio_write(client, CFG, + ADP5588_AUTO_INC | ADP5588_INT_CFG | ADP5588_GPI_INT); + + return 0; + +out: + dev->irq_base = 0; + return ret; +} + +static void adp5588_irq_teardown(struct adp5588_gpio *dev) +{ + if (dev->irq_base) + free_irq(dev->client->irq, dev); +} + +#else +static int adp5588_irq_setup(struct adp5588_gpio *dev) +{ + struct i2c_client *client = dev->client; + dev_warn(&client->dev, "interrupt support not compiled in\n"); + + return 0; +} + +static void adp5588_irq_teardown(struct adp5588_gpio *dev) +{ +} +#endif /* CONFIG_GPIO_ADP5588_IRQ */ + static int __devinit adp5588_gpio_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -160,37 +380,46 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client, gc->can_sleep = 1; gc->base = pdata->gpio_start; - gc->ngpio = MAXGPIO; + gc->ngpio = ADP5588_MAXGPIO; gc->label = client->name; gc->owner = THIS_MODULE; mutex_init(&dev->lock); - ret = adp5588_gpio_read(dev->client, DEV_ID); if (ret < 0) goto err; revid = ret & ADP5588_DEVICE_ID_MASK; - for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) { + for (i = 0, ret = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) { dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i); dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i); ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0); ret |= adp5588_gpio_write(client, GPIO_PULL1 + i, (pdata->pullup_dis_mask >> (8 * i)) & 0xFF); - + ret |= adp5588_gpio_write(client, GPIO_INT_EN1 + i, 0); if (ret) goto err; } + if (pdata->irq_base) { + if (WA_DELAYED_READOUT_REVID(revid)) { + dev_warn(&client->dev, "GPIO int not supported\n"); + } else { + ret = adp5588_irq_setup(dev); + if (ret) + goto err; + } + } + ret = gpiochip_add(&dev->gpio_chip); if (ret) - goto err; + goto err_irq; - dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n", + dev_info(&client->dev, "gpios %d..%d (IRQ Base %d) on a %s Rev. %d\n", gc->base, gc->base + gc->ngpio - 1, - client->name, revid); + pdata->irq_base, client->name, revid); if (pdata->setup) { ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context); @@ -199,8 +428,11 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client, } i2c_set_clientdata(client, dev); + return 0; +err_irq: + adp5588_irq_teardown(dev); err: kfree(dev); return ret; @@ -222,6 +454,9 @@ static int __devexit adp5588_gpio_remove(struct i2c_client *client) } } + if (dev->irq_base) + free_irq(dev->client->irq, dev); + ret = gpiochip_remove(&dev->gpio_chip); if (ret) { dev_err(&client->dev, "gpiochip_remove failed %d\n", ret); diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index 269181b8f623..531376b77773 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -74,6 +74,20 @@ #define ADP5588_DEVICE_ID_MASK 0xF + /* Configuration Register1 */ +#define ADP5588_AUTO_INC (1 << 7) +#define ADP5588_GPIEM_CFG (1 << 6) +#define ADP5588_INT_CFG (1 << 4) +#define ADP5588_GPI_IEN (1 << 1) + +/* Interrupt Status Register */ +#define ADP5588_GPI_INT (1 << 1) +#define ADP5588_KE_INT (1 << 0) + +#define ADP5588_MAXGPIO 18 +#define ADP5588_BANK(offs) ((offs) >> 3) +#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) + /* Put one of these structures in i2c_board_info platform_data */ #define ADP5588_KEYMAPSIZE 80 @@ -128,6 +142,7 @@ struct adp5588_kpad_platform_data { struct adp5588_gpio_platform_data { unsigned gpio_start; /* GPIO Chip base # */ + unsigned irq_base; /* interrupt base # */ unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ int (*setup)(struct i2c_client *client, int gpio, unsigned ngpio, From 9ef8c8c51a7d76bae73e0259c356b24533b6b7c0 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:20 -0700 Subject: [PATCH 024/139] gpio: adp5588-gpio: gpio_start must be signed Common code interprets this as a signed value (a negative value is used to request dynamic ID allocation), so make sure the platform data has proper types to support that. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index 531376b77773..bec05ed21766 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -141,9 +141,9 @@ struct adp5588_kpad_platform_data { }; struct adp5588_gpio_platform_data { - unsigned gpio_start; /* GPIO Chip base # */ - unsigned irq_base; /* interrupt base # */ - unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int gpio_start; /* GPIO Chip base # */ + unsigned irq_base; /* interrupt base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ int (*setup)(struct i2c_client *client, int gpio, unsigned ngpio, void *context); From dc5ae4f2f58cfa98b67d2be379fc99080a8967af Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:20 -0700 Subject: [PATCH 025/139] gpio: adp5588-gpio: add i2c forward declaration Some ADP5588 functions take a pointer to an i2c_client, but if the i2c header doesn't happen to be included first, we hit the standard "struct declared inside parameter list" warnings from gcc. So add a simple forward decl of the i2c_client struct. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index bec05ed21766..3c5d6b6e765c 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -140,6 +140,8 @@ struct adp5588_kpad_platform_data { const struct adp5588_gpio_platform_data *gpio_data; }; +struct i2c_client; /* forward declaration */ + struct adp5588_gpio_platform_data { int gpio_start; /* GPIO Chip base # */ unsigned irq_base; /* interrupt base # */ From 04c17aa89380addf8d7df6f0fd269fc2bd87796c Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Wed, 27 Oct 2010 15:33:21 -0700 Subject: [PATCH 026/139] gpio: add Topcliff PCH GPIO driver Topcliff PCH is the platform controller hub that is going to be used in Intel's upcoming general embedded platform. All IO peripherals in Topcliff PCH are actually devices sitting on AMBA bus. Topcliff PCH has GPIO I/F. Using this I/F, it is able to access system devices connected to GPIO. [akpm@linux-foundation.org: ese DEFINE_PCI_DEVICE_TABLE (per Joe Perches)] Signed-off-by: Tomoya MORINAGA Reviewed-by: Mark Brown Cc: Rabin Vincent Cc: Samuel Ortiz Cc: Linus Walleij Cc: Tomoya MORINAGA Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 8 ++ drivers/gpio/Makefile | 1 + drivers/gpio/pch_gpio.c | 312 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 drivers/gpio/pch_gpio.c diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3f3181dac8d7..dd9b4ba8d32d 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -313,6 +313,14 @@ config GPIO_LANGWELL help Say Y here to support Intel Langwell/Penwell GPIO. +config GPIO_PCH + tristate "PCH GPIO of Intel Topcliff" + depends on PCI + help + This driver is for PCH(Platform controller Hub) GPIO of Intel Topcliff + which is an IOH(Input/Output Hub) for x86 embedded processor. + This driver can access PCH GPIO device. + config GPIO_TIMBERDALE bool "Support for timberdale GPIO IP" depends on MFD_TIMBERDALE && GPIOLIB && HAS_IOMEM diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 0c23a4dd45e5..da2ecde5abdd 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o obj-$(CONFIG_GPIO_74X164) += 74x164.o obj-$(CONFIG_GPIO_PCA953X) += pca953x.o obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o +obj-$(CONFIG_GPIO_PCH) += pch_gpio.o obj-$(CONFIG_GPIO_PL061) += pl061.o obj-$(CONFIG_GPIO_STMPE) += stmpe-gpio.o obj-$(CONFIG_GPIO_TC35892) += tc35892-gpio.o diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c new file mode 100644 index 000000000000..0eba0a75c804 --- /dev/null +++ b/drivers/gpio/pch_gpio.c @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include +#include + +#define PCH_GPIO_ALL_PINS 0xfff /* Mask for GPIO pins 0 to 11 */ +#define GPIO_NUM_PINS 12 /* Specifies number of GPIO PINS GPIO0-GPIO11 */ + +struct pch_regs { + u32 ien; + u32 istatus; + u32 idisp; + u32 iclr; + u32 imask; + u32 imaskclr; + u32 po; + u32 pi; + u32 pm; + u32 im0; + u32 im1; + u32 reserved[4]; + u32 reset; +}; + +/** + * struct pch_gpio_reg_data - The register store data. + * @po_reg: To store contents of PO register. + * @pm_reg: To store contents of PM register. + */ +struct pch_gpio_reg_data { + u32 po_reg; + u32 pm_reg; +}; + +/** + * struct pch_gpio - GPIO private data structure. + * @base: PCI base address of Memory mapped I/O register. + * @reg: Memory mapped PCH GPIO register list. + * @dev: Pointer to device structure. + * @gpio: Data for GPIO infrastructure. + * @pch_gpio_reg: Memory mapped Register data is saved here + * when suspend. + */ +struct pch_gpio { + void __iomem *base; + struct pch_regs __iomem *reg; + struct device *dev; + struct gpio_chip gpio; + struct pch_gpio_reg_data pch_gpio_reg; + struct mutex lock; +}; + +static void pch_gpio_set(struct gpio_chip *gpio, unsigned nr, int val) +{ + u32 reg_val; + struct pch_gpio *chip = container_of(gpio, struct pch_gpio, gpio); + + mutex_lock(&chip->lock); + reg_val = ioread32(&chip->reg->po); + if (val) + reg_val |= (1 << nr); + else + reg_val &= ~(1 << nr); + + iowrite32(reg_val, &chip->reg->po); + mutex_unlock(&chip->lock); +} + +static int pch_gpio_get(struct gpio_chip *gpio, unsigned nr) +{ + struct pch_gpio *chip = container_of(gpio, struct pch_gpio, gpio); + + return ioread32(&chip->reg->pi) & (1 << nr); +} + +static int pch_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, + int val) +{ + struct pch_gpio *chip = container_of(gpio, struct pch_gpio, gpio); + u32 pm; + u32 reg_val; + + mutex_lock(&chip->lock); + pm = ioread32(&chip->reg->pm) & PCH_GPIO_ALL_PINS; + pm |= (1 << nr); + iowrite32(pm, &chip->reg->pm); + + reg_val = ioread32(&chip->reg->po); + if (val) + reg_val |= (1 << nr); + else + reg_val &= ~(1 << nr); + + mutex_unlock(&chip->lock); + + return 0; +} + +static int pch_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) +{ + struct pch_gpio *chip = container_of(gpio, struct pch_gpio, gpio); + u32 pm; + + mutex_lock(&chip->lock); + pm = ioread32(&chip->reg->pm) & PCH_GPIO_ALL_PINS; /*bits 0-11*/ + pm &= ~(1 << nr); + iowrite32(pm, &chip->reg->pm); + mutex_unlock(&chip->lock); + + return 0; +} + +/* + * Save register configuration and disable interrupts. + */ +static void pch_gpio_save_reg_conf(struct pch_gpio *chip) +{ + chip->pch_gpio_reg.po_reg = ioread32(&chip->reg->po); + chip->pch_gpio_reg.pm_reg = ioread32(&chip->reg->pm); +} + +/* + * This function restores the register configuration of the GPIO device. + */ +static void pch_gpio_restore_reg_conf(struct pch_gpio *chip) +{ + /* to store contents of PO register */ + iowrite32(chip->pch_gpio_reg.po_reg, &chip->reg->po); + /* to store contents of PM register */ + iowrite32(chip->pch_gpio_reg.pm_reg, &chip->reg->pm); +} + +static void pch_gpio_setup(struct pch_gpio *chip) +{ + struct gpio_chip *gpio = &chip->gpio; + + gpio->label = dev_name(chip->dev); + gpio->owner = THIS_MODULE; + gpio->direction_input = pch_gpio_direction_input; + gpio->get = pch_gpio_get; + gpio->direction_output = pch_gpio_direction_output; + gpio->set = pch_gpio_set; + gpio->dbg_show = NULL; + gpio->base = -1; + gpio->ngpio = GPIO_NUM_PINS; + gpio->can_sleep = 0; +} + +static int __devinit pch_gpio_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + s32 ret; + struct pch_gpio *chip; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + chip->dev = &pdev->dev; + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s : pci_enable_device FAILED", __func__); + goto err_pci_enable; + } + + ret = pci_request_regions(pdev, KBUILD_MODNAME); + if (ret) { + dev_err(&pdev->dev, "pci_request_regions FAILED-%d", ret); + goto err_request_regions; + } + + chip->base = pci_iomap(pdev, 1, 0); + if (chip->base == 0) { + dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__); + ret = -ENOMEM; + goto err_iomap; + } + + chip->reg = chip->base; + pci_set_drvdata(pdev, chip); + mutex_init(&chip->lock); + pch_gpio_setup(chip); + ret = gpiochip_add(&chip->gpio); + if (ret) { + dev_err(&pdev->dev, "PCH gpio: Failed to register GPIO\n"); + goto err_gpiochip_add; + } + + return 0; + +err_gpiochip_add: + pci_iounmap(pdev, chip->base); + +err_iomap: + pci_release_regions(pdev); + +err_request_regions: + pci_disable_device(pdev); + +err_pci_enable: + kfree(chip); + dev_err(&pdev->dev, "%s Failed returns %d\n", __func__, ret); + return ret; +} + +static void __devexit pch_gpio_remove(struct pci_dev *pdev) +{ + int err; + struct pch_gpio *chip = pci_get_drvdata(pdev); + + err = gpiochip_remove(&chip->gpio); + if (err) + dev_err(&pdev->dev, "Failed gpiochip_remove\n"); + + pci_iounmap(pdev, chip->base); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(chip); +} + +#ifdef CONFIG_PM +static int pch_gpio_suspend(struct pci_dev *pdev, pm_message_t state) +{ + s32 ret; + struct pch_gpio *chip = pci_get_drvdata(pdev); + + pch_gpio_save_reg_conf(chip); + pch_gpio_restore_reg_conf(chip); + + ret = pci_save_state(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_save_state Failed-%d\n", ret); + return ret; + } + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D0); + ret = pci_enable_wake(pdev, PCI_D0, 1); + if (ret) + dev_err(&pdev->dev, "pci_enable_wake Failed -%d\n", ret); + + return 0; +} + +static int pch_gpio_resume(struct pci_dev *pdev) +{ + s32 ret; + struct pch_gpio *chip = pci_get_drvdata(pdev); + + ret = pci_enable_wake(pdev, PCI_D0, 0); + + pci_set_power_state(pdev, PCI_D0); + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_device Failed-%d ", ret); + return ret; + } + pci_restore_state(pdev); + + iowrite32(0x01, &chip->reg->reset); + iowrite32(0x00, &chip->reg->reset); + pch_gpio_restore_reg_conf(chip); + + return 0; +} +#else +#define pch_gpio_suspend NULL +#define pch_gpio_resume NULL +#endif + +static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, + { 0, } +}; + +static struct pci_driver pch_gpio_driver = { + .name = "pch_gpio", + .id_table = pch_gpio_pcidev_id, + .probe = pch_gpio_probe, + .remove = __devexit_p(pch_gpio_remove), + .suspend = pch_gpio_suspend, + .resume = pch_gpio_resume +}; + +static int __init pch_gpio_pci_init(void) +{ + return pci_register_driver(&pch_gpio_driver); +} +module_init(pch_gpio_pci_init); + +static void __exit pch_gpio_pci_exit(void) +{ + pci_unregister_driver(&pch_gpio_driver); +} +module_exit(pch_gpio_pci_exit); + +MODULE_DESCRIPTION("PCH GPIO PCI Driver"); +MODULE_LICENSE("GPL"); From fd0574cb54bf1dd068e4603f0d67d237aa1d718d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Oct 2010 15:33:22 -0700 Subject: [PATCH 027/139] drivers/gpio/langwell_gpio.c: remove semicolons after function definitions Deweird this driver. Cc: Alek Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/langwell_gpio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c index 8383a8d7f994..222de1292e2b 100644 --- a/drivers/gpio/langwell_gpio.c +++ b/drivers/gpio/langwell_gpio.c @@ -158,15 +158,15 @@ static int lnw_irq_type(unsigned irq, unsigned type) spin_unlock_irqrestore(&lnw->lock, flags); return 0; -}; +} static void lnw_irq_unmask(unsigned irq) { -}; +} static void lnw_irq_mask(unsigned irq) { -}; +} static struct irq_chip lnw_irqchip = { .name = "LNW-GPIO", From 72b4379e9502e7bf64256af83a55f90bd13d1ce6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 27 Oct 2010 15:33:23 -0700 Subject: [PATCH 028/139] langwell_gpio: add support for whitney point In this case the logic is very similar but the IRQs are not exposed and the device is not picked up via PCI Based on a separate internal whitney point driver by Yin Kangkai. Signed-off-by: Alan Cox Cc: Yin Kangkai Cc: Alek Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/langwell_gpio.c | 83 +++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c index 222de1292e2b..64db9dc3a275 100644 --- a/drivers/gpio/langwell_gpio.c +++ b/drivers/gpio/langwell_gpio.c @@ -18,10 +18,12 @@ /* Supports: * Moorestown platform Langwell chip. * Medfield platform Penwell chip. + * Whitney point. */ #include #include +#include #include #include #include @@ -300,9 +302,88 @@ static struct pci_driver lnw_gpio_driver = { .probe = lnw_gpio_probe, }; + +static int __devinit wp_gpio_probe(struct platform_device *pdev) +{ + struct lnw_gpio *lnw; + struct gpio_chip *gc; + struct resource *rc; + int retval = 0; + + rc = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!rc) + return -EINVAL; + + lnw = kzalloc(sizeof(struct lnw_gpio), GFP_KERNEL); + if (!lnw) { + dev_err(&pdev->dev, + "can't allocate whitneypoint_gpio chip data\n"); + return -ENOMEM; + } + lnw->reg_base = ioremap_nocache(rc->start, resource_size(rc)); + if (lnw->reg_base == NULL) { + retval = -EINVAL; + goto err_kmalloc; + } + spin_lock_init(&lnw->lock); + gc = &lnw->chip; + gc->label = dev_name(&pdev->dev); + gc->owner = THIS_MODULE; + gc->direction_input = lnw_gpio_direction_input; + gc->direction_output = lnw_gpio_direction_output; + gc->get = lnw_gpio_get; + gc->set = lnw_gpio_set; + gc->to_irq = NULL; + gc->base = 0; + gc->ngpio = 64; + gc->can_sleep = 0; + retval = gpiochip_add(gc); + if (retval) { + dev_err(&pdev->dev, "whitneypoint gpiochip_add error %d\n", + retval); + goto err_ioremap; + } + platform_set_drvdata(pdev, lnw); + return 0; +err_ioremap: + iounmap(lnw->reg_base); +err_kmalloc: + kfree(lnw); + return retval; +} + +static int __devexit wp_gpio_remove(struct platform_device *pdev) +{ + struct lnw_gpio *lnw = platform_get_drvdata(pdev); + int err; + err = gpiochip_remove(&lnw->chip); + if (err) + dev_err(&pdev->dev, "failed to remove gpio_chip.\n"); + iounmap(lnw->reg_base); + kfree(lnw); + platform_set_drvdata(pdev, NULL); + return 0; +} + +static struct platform_driver wp_gpio_driver = { + .probe = wp_gpio_probe, + .remove = __devexit_p(wp_gpio_remove), + .driver = { + .name = "wp_gpio", + .owner = THIS_MODULE, + }, +}; + static int __init lnw_gpio_init(void) { - return pci_register_driver(&lnw_gpio_driver); + int ret; + ret = pci_register_driver(&lnw_gpio_driver); + if (ret < 0) + return ret; + ret = platform_driver_register(&wp_gpio_driver); + if (ret < 0) + pci_unregister_driver(&lnw_gpio_driver); + return ret; } device_initcall(lnw_gpio_init); From 09b599ddd467911ad6b43f3abe3c533446662417 Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Wed, 27 Oct 2010 15:33:23 -0700 Subject: [PATCH 029/139] drivers/video/matrox/matroxfb_DAC1064.c: remove undead ifdef CONFIG_FB_MATROX_G The CONFIG_FB_MATROX_G ifdef isn't necessary at this point, because it is checked in an outer ifdef level already and has no effect here. Signed-off-by: Christian Dietrich Acked-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/matrox/matroxfb_DAC1064.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/video/matrox/matroxfb_DAC1064.c b/drivers/video/matrox/matroxfb_DAC1064.c index f9fa0fd00292..1717623aabc0 100644 --- a/drivers/video/matrox/matroxfb_DAC1064.c +++ b/drivers/video/matrox/matroxfb_DAC1064.c @@ -869,12 +869,9 @@ static int MGAG100_preinit(struct matrox_fb_info *minfo) minfo->capable.plnwt = minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG100 ? minfo->devflags.sgram : 1; -#ifdef CONFIG_FB_MATROX_G if (minfo->devflags.g450dac) { minfo->outputs[0].output = &g450out; - } else -#endif - { + } else { minfo->outputs[0].output = &m1064; } minfo->outputs[0].src = minfo->outputs[0].default_src; From f0a2f357d46a51f8066eb47b3dba40f87a680804 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 27 Oct 2010 15:33:25 -0700 Subject: [PATCH 030/139] savagefb: fix DDC for Savage 4 I tested savagefb on 3 different Savage 4 cards: Diamond Stealth III S520 Number Nine SR9 Datapath Horizon 2S (two savage chips on a PCI card) it worked except the DDC which did not work on any of them. Looking at the BIOS code, it does not use MMIO register 0xff20 but CRT register 0xa0 or 0xb1 - depending on the chip revision and something in register 0xa6. With this patch, DDC works fine on all 3 cards (even on the second head of Horizon 2S - although it does not display anything as it's misconfigured because of missing BIOS). Signed-off-by: Ondrej Zary Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/savage/savagefb-i2c.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c index ed371c868b3a..b16e6138fdd4 100644 --- a/drivers/video/savage/savagefb-i2c.c +++ b/drivers/video/savage/savagefb-i2c.c @@ -181,6 +181,15 @@ void savagefb_create_i2c_busses(struct fb_info *info) par->chan.algo.getscl = prosavage_gpio_getscl; break; case FB_ACCEL_SAVAGE4: + par->chan.reg = CR_SERIAL1; + if (par->pcidev->revision > 1 && !(VGArCR(0xa6, par) & 0x40)) + par->chan.reg = CR_SERIAL2; + par->chan.ioaddr = par->mmio.vbase; + par->chan.algo.setsda = prosavage_gpio_setsda; + par->chan.algo.setscl = prosavage_gpio_setscl; + par->chan.algo.getsda = prosavage_gpio_getsda; + par->chan.algo.getscl = prosavage_gpio_getscl; + break; case FB_ACCEL_SAVAGE2000: par->chan.reg = 0xff20; par->chan.ioaddr = par->mmio.vbase; From 847fb8aca3b79d03ef6c5f87f0843a30ed9e6627 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 27 Oct 2010 15:33:26 -0700 Subject: [PATCH 031/139] drivers/video/matrox/matroxfb_maven.c: fix unsigned return type The function has an unsigned return type, but returns a negative constant to indicate an error condition. The result of calling the function is only tested by comparison to 0, and thus unsigned is not needed and can be dropped from the return type. A sematic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ identifier f; constant C; @@ unsigned f(...) { <+... * return -C; ...+> } // Signed-off-by: Julia Lawall Cc: Petr Vandrovec Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/matrox/matroxfb_maven.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c index 1e3e8f19783e..31b8f67477b7 100644 --- a/drivers/video/matrox/matroxfb_maven.c +++ b/drivers/video/matrox/matroxfb_maven.c @@ -280,7 +280,7 @@ static int matroxfb_PLL_mavenclock(const struct matrox_pll_features2* pll, return fxtal * (*feed) / (*in) * ctl->den; } -static unsigned int matroxfb_mavenclock(const struct matrox_pll_ctl* ctl, +static int matroxfb_mavenclock(const struct matrox_pll_ctl *ctl, unsigned int htotal, unsigned int vtotal, unsigned int* in, unsigned int* feed, unsigned int* post, unsigned int* htotal2) { From c9c62dce35e7fc54beebafb071393a0008989e49 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Oct 2010 15:33:27 -0700 Subject: [PATCH 032/139] fbmem: fix whitespace Change a few lines of indentation to tabs. Signed-off-by: James Hogan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 42e303ff862a..a43442341ddd 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -877,13 +877,13 @@ fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var) if ((err = info->fbops->fb_pan_display(var, info))) return err; - info->var.xoffset = var->xoffset; - info->var.yoffset = var->yoffset; - if (var->vmode & FB_VMODE_YWRAP) - info->var.vmode |= FB_VMODE_YWRAP; - else - info->var.vmode &= ~FB_VMODE_YWRAP; - return 0; + info->var.xoffset = var->xoffset; + info->var.yoffset = var->yoffset; + if (var->vmode & FB_VMODE_YWRAP) + info->var.vmode |= FB_VMODE_YWRAP; + else + info->var.vmode &= ~FB_VMODE_YWRAP; + return 0; } static int fb_check_caps(struct fb_info *info, struct fb_var_screeninfo *var, From f11b478d461b7113eb4603b3914aaf15b7788e87 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Oct 2010 15:33:28 -0700 Subject: [PATCH 033/139] fbmem: fix fb_read, fb_write unaligned accesses fb_{read,write} access the framebuffer using lots of fb_{read,write}l's but don't check that the file position is aligned which can cause problems on some architectures which do not support unaligned accesses. Since the operations are essentially memcpy_{from,to}io, new fb_memcpy_{from,to}fb macros have been defined and these are used instead. For Sparc, fb_{read,write} macros use sbus_{read,write}, so this defines new sbus_memcpy_{from,to}io functions the same as memcpy_{from,to}io but using sbus_{read,write}b instead of {read,write}b. Signed-off-by: James Hogan Acked-by: David S. Miller Acked-by: Florian Tobias Schandinat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/io_32.h | 31 +++++++++++++++++++++++ arch/sparc/include/asm/io_64.h | 31 +++++++++++++++++++++++ drivers/video/fbmem.c | 46 +++++++++++----------------------- include/linux/fb.h | 6 +++++ 4 files changed, 82 insertions(+), 32 deletions(-) diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h index 2889574608db..c2ced21c9dc1 100644 --- a/arch/sparc/include/asm/io_32.h +++ b/arch/sparc/include/asm/io_32.h @@ -207,6 +207,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n) #define memset_io(d,c,sz) _memset_io(d,c,sz) +static inline void +_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src, + __kernel_size_t n) +{ + char *d = dst; + + while (n--) { + char tmp = sbus_readb(src); + *d++ = tmp; + src++; + } +} + +#define sbus_memcpy_fromio(d, s, sz) _sbus_memcpy_fromio(d, s, sz) + static inline void _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) { @@ -221,6 +236,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) #define memcpy_fromio(d,s,sz) _memcpy_fromio(d,s,sz) +static inline void +_sbus_memcpy_toio(volatile void __iomem *dst, const void *src, + __kernel_size_t n) +{ + const char *s = src; + volatile void __iomem *d = dst; + + while (n--) { + char tmp = *s++; + sbus_writeb(tmp, d); + d++; + } +} + +#define sbus_memcpy_toio(d, s, sz) _sbus_memcpy_toio(d, s, sz) + static inline void _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) { diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 9517d063c79c..9c8965415f0a 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -418,6 +418,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n) #define memset_io(d,c,sz) _memset_io(d,c,sz) +static inline void +_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src, + __kernel_size_t n) +{ + char *d = dst; + + while (n--) { + char tmp = sbus_readb(src); + *d++ = tmp; + src++; + } +} + +#define sbus_memcpy_fromio(d, s, sz) _sbus_memcpy_fromio(d, s, sz) + static inline void _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) { @@ -432,6 +447,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) #define memcpy_fromio(d,s,sz) _memcpy_fromio(d,s,sz) +static inline void +_sbus_memcpy_toio(volatile void __iomem *dst, const void *src, + __kernel_size_t n) +{ + const char *s = src; + volatile void __iomem *d = dst; + + while (n--) { + char tmp = *s++; + sbus_writeb(tmp, d); + d++; + } +} + +#define sbus_memcpy_toio(d, s, sz) _sbus_memcpy_toio(d, s, sz) + static inline void _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) { diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index a43442341ddd..0e6aa3d96a42 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -697,9 +697,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; - u32 *buffer, *dst; - u32 __iomem *src; - int c, i, cnt = 0, err = 0; + u8 *buffer, *dst; + u8 __iomem *src; + int c, cnt = 0, err = 0; unsigned long total_size; if (!info || ! info->screen_base) @@ -730,7 +730,7 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (!buffer) return -ENOMEM; - src = (u32 __iomem *) (info->screen_base + p); + src = (u8 __iomem *) (info->screen_base + p); if (info->fbops->fb_sync) info->fbops->fb_sync(info); @@ -738,17 +738,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) while (count) { c = (count > PAGE_SIZE) ? PAGE_SIZE : count; dst = buffer; - for (i = c >> 2; i--; ) - *dst++ = fb_readl(src++); - if (c & 3) { - u8 *dst8 = (u8 *) dst; - u8 __iomem *src8 = (u8 __iomem *) src; - - for (i = c & 3; i--;) - *dst8++ = fb_readb(src8++); - - src = (u32 __iomem *) src8; - } + fb_memcpy_fromfb(dst, src, c); + dst += c; + src += c; if (copy_to_user(buf, buffer, c)) { err = -EFAULT; @@ -772,9 +764,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; - u32 *buffer, *src; - u32 __iomem *dst; - int c, i, cnt = 0, err = 0; + u8 *buffer, *src; + u8 __iomem *dst; + int c, cnt = 0, err = 0; unsigned long total_size; if (!info || !info->screen_base) @@ -811,7 +803,7 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) if (!buffer) return -ENOMEM; - dst = (u32 __iomem *) (info->screen_base + p); + dst = (u8 __iomem *) (info->screen_base + p); if (info->fbops->fb_sync) info->fbops->fb_sync(info); @@ -825,19 +817,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) break; } - for (i = c >> 2; i--; ) - fb_writel(*src++, dst++); - - if (c & 3) { - u8 *src8 = (u8 *) src; - u8 __iomem *dst8 = (u8 __iomem *) dst; - - for (i = c & 3; i--; ) - fb_writeb(*src8++, dst8++); - - dst = (u32 __iomem *) dst8; - } - + fb_memcpy_tofb(dst, src, c); + dst += c; + src += c; *ppos += c; buf += c; cnt += c; diff --git a/include/linux/fb.h b/include/linux/fb.h index f0268deca658..7fca3dc4e475 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -931,6 +931,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel sbus_writel #define fb_writeq sbus_writeq #define fb_memset sbus_memset_io +#define fb_memcpy_fromfb sbus_memcpy_fromio +#define fb_memcpy_tofb sbus_memcpy_toio #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) @@ -943,6 +945,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel __raw_writel #define fb_writeq __raw_writeq #define fb_memset memset_io +#define fb_memcpy_fromfb memcpy_fromio +#define fb_memcpy_tofb memcpy_toio #else @@ -955,6 +959,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel(b,addr) (*(volatile u32 *) (addr) = (b)) #define fb_writeq(b,addr) (*(volatile u64 *) (addr) = (b)) #define fb_memset memset +#define fb_memcpy_fromfb memcpy +#define fb_memcpy_tofb memcpy #endif From f35691062a138484b51bf53b36ae8a4495d8fb91 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 27 Oct 2010 15:33:28 -0700 Subject: [PATCH 034/139] drivers/video/gbefb.c: eliminate memory leak This code is preceded by a call to framebuffer_alloc, which allocates memory, so this memory should be freed before leaving the function in an error case. out_release_framebuffer just frees the frame buffer and returns. A simplified version of the semantic match that finds this problem is: (http://coccinelle.lip6.fr/) // @r exists@ local idexpression x; expression E; identifier f1; iterator I; @@ x = framebuffer_alloc(...); <... when != x when != true (x == NULL || ...) when != if (...) { <+...x...+> } when != I (...) { <+...x...+> } ( x == NULL | x == E | x->f1 ) ...> * return ...; // Signed-off-by: Julia Lawall Cc: Ralf Baechle Cc: Arnaud Patard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/gbefb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index ca3355e430bf..933899dca33a 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1143,8 +1143,10 @@ static int __devinit gbefb_probe(struct platform_device *p_dev) return -ENOMEM; #ifndef MODULE - if (fb_get_options("gbefb", &options)) - return -ENODEV; + if (fb_get_options("gbefb", &options)) { + ret = -ENODEV; + goto out_release_framebuffer; + } gbefb_setup(options); #endif From b4754ebc319ca2e8eb95e51a5d2e2d00047c4db9 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 27 Oct 2010 15:33:29 -0700 Subject: [PATCH 035/139] drivers/video/omap/blizzard.c: suspected typo in assignment Untested, but looks like an obvious typo to me. Signed-off-by: Nicolas Kaiser Cc: Tomi Valkeinen Cc: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/omap/blizzard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/omap/blizzard.c b/drivers/video/omap/blizzard.c index 2ffb34af4c59..87785c215a52 100644 --- a/drivers/video/omap/blizzard.c +++ b/drivers/video/omap/blizzard.c @@ -1590,7 +1590,7 @@ static int blizzard_init(struct omapfb_device *fbdev, int ext_mode, blizzard.auto_update_window.width = fbdev->panel->x_res; blizzard.auto_update_window.height = fbdev->panel->y_res; blizzard.auto_update_window.out_x = 0; - blizzard.auto_update_window.out_x = 0; + blizzard.auto_update_window.out_y = 0; blizzard.auto_update_window.out_width = fbdev->panel->x_res; blizzard.auto_update_window.out_height = fbdev->panel->y_res; blizzard.auto_update_window.format = 0; From e45c9effed903ba3fdbd6ef0498ee8989c35af0a Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 27 Oct 2010 15:33:30 -0700 Subject: [PATCH 036/139] isofs: work-around for Rock Ridge+Joliet CDs with empty ISO root directory If a CD has both Rock Ridge and Joliet extensions and the ISO root directory is empty, no files are visible. Disable Rock Ridge extensions in this case and use Joliet root directory instead. Signed-off-by: Ondrej Zary Cc: Al Viro Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/isofs/inode.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 60c2b944d762..79cf7f616bbe 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -543,6 +543,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) return vol_desc_start; } +/* + * Check if root directory is empty (has less than 3 files). + * + * Used to detect broken CDs where ISO root directory is empty but Joliet root + * directory is OK. If such CD has Rock Ridge extensions, they will be disabled + * (and Joliet used instead) or else no files would be visible. + */ +static bool rootdir_empty(struct super_block *sb, unsigned long block) +{ + int offset = 0, files = 0, de_len; + struct iso_directory_record *de; + struct buffer_head *bh; + + bh = sb_bread(sb, block); + if (!bh) + return true; + while (files < 3) { + de = (struct iso_directory_record *) (bh->b_data + offset); + de_len = *(unsigned char *) de; + if (de_len == 0) + break; + files++; + offset += de_len; + } + brelse(bh); + return files < 3; +} + /* * Initialize the superblock and read the root inode. * @@ -842,6 +870,18 @@ root_found: if (IS_ERR(inode)) goto out_no_root; + /* + * Fix for broken CDs with Rock Ridge and empty ISO root directory but + * correct Joliet root directory. + */ + if (sbi->s_rock == 1 && joliet_level && + rootdir_empty(s, sbi->s_firstdatazone)) { + printk(KERN_NOTICE + "ISOFS: primary root directory is empty. " + "Disabling Rock Ridge and switching to Joliet."); + sbi->s_rock = 0; + } + /* * If this disk has both Rock Ridge and Joliet on it, then we * want to use Rock Ridge by default. This can be overridden From abffc0207f12563f17bbde96e4cc0d9f3d7e2a53 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 27 Oct 2010 15:33:31 -0700 Subject: [PATCH 037/139] doc: clarify the behaviour of dirty_ratio/dirty_bytes When dirty_ratio or dirty_bytes is written the other parameter is disabled and set to 0 (in dirty_bytes_handler() / dirty_ratio_handler()). We do the same for dirty_background_ratio and dirty_background_bytes. However, in the sysctl documentation, we say that the counterpart becomes a function of the old value, that is not correct. Clarify the documentation reporting the actual behaviour. Reviewed-by: Greg Thelen Acked-by: David Rientjes Signed-off-by: Andrea Righi Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b606c2c4dd37..30289fab86eb 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -80,8 +80,10 @@ dirty_background_bytes Contains the amount of dirty memory at which the pdflush background writeback daemon will start writeback. -If dirty_background_bytes is written, dirty_background_ratio becomes a function -of its value (dirty_background_bytes / the amount of dirtyable system memory). +Note: dirty_background_bytes is the counterpart of dirty_background_ratio. Only +one of them may be specified at a time. When one sysctl is written it is +immediately taken into account to evaluate the dirty memory limits and the +other appears as 0 when read. ============================================================== @@ -97,8 +99,10 @@ dirty_bytes Contains the amount of dirty memory at which a process generating disk writes will itself start writeback. -If dirty_bytes is written, dirty_ratio becomes a function of its value -(dirty_bytes / the amount of dirtyable system memory). +Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be +specified at a time. When one sysctl is written it is immediately taken into +account to evaluate the dirty memory limits and the other appears as 0 when +read. Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any value lower than this limit will be ignored and the old configuration will be From d5de4ddb1bc430289bede76c0d87cabee93f749a Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:32 -0700 Subject: [PATCH 038/139] cgroup_freezer: unnecessary test in cgroup_freezing_or_frozen() The root freezer_state is always CGROUP_THAWED so we can remove the special case from the code. The test itself can be handy and is extracted to static function. Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed53e88f..321e2a007d25 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } +static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +{ + enum freezer_state state = task_freezer(task)->state; + return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); +} + int cgroup_freezing_or_frozen(struct task_struct *task) { - struct freezer *freezer; - enum freezer_state state; - + int result; task_lock(task); - freezer = task_freezer(task); - if (!freezer->css.cgroup->parent) - state = CGROUP_THAWED; /* root cgroup can't be frozen */ - else - state = freezer->state; + result = __cgroup_freezing_or_frozen(task); task_unlock(task); - - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); + return result; } /* From 0bdba580ab052a21e3eda2764ed22d9ee962392b Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:33 -0700 Subject: [PATCH 039/139] cgroup_freezer: fix can_attach() to prohibit moving from/to freezing/frozen cgroups It is possible to move a task from its cgroup even if this group is 'FREEZING'. This results in a nasty bug - the moved task will become frozen OUTSIDE its original cgroup and will remain in a permanent 'D' state. This patch allows to migrate the task only between THAWED cgroups. This behavior was observed and easily reproduced on a single core laptop. Notice that reproducibility depends highly on the machine used. Program and instructions how to reproduce the bug can be fetched from: http://pentium.hopto.org/~thinred/repos/linux-misc/freezer_bug.c Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 321e2a007d25..c287627bed3d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -173,24 +173,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss, /* * Anything frozen can't move or be moved to/from. - * - * Since orig_freezer->state == FROZEN means that @task has been - * frozen, so it's sufficient to check the latter condition. */ - if (is_task_frozen_enough(task)) + freezer = cgroup_freezer(new_cgroup); + if (freezer->state != CGROUP_THAWED) return -EBUSY; - freezer = cgroup_freezer(new_cgroup); - if (freezer->state == CGROUP_FROZEN) + rcu_read_lock(); + if (__cgroup_freezing_or_frozen(task)) { + rcu_read_unlock(); return -EBUSY; + } + rcu_read_unlock(); if (threadgroup) { struct task_struct *c; rcu_read_lock(); list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - if (is_task_frozen_enough(c)) { + if (__cgroup_freezing_or_frozen(c)) { rcu_read_unlock(); return -EBUSY; } From 2d3cbf8bc852ac1bc3d098186143c5973f87b753 Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:34 -0700 Subject: [PATCH 040/139] cgroup_freezer: update_freezer_state() does incorrect state transitions There are 4 state transitions possible for a freezer. Only FREEZING -> FROZEN transaction is done lazily. This patch allows update_freezer_state only to perform this transaction and renames the function to update_if_frozen. Moreover is_task_frozen_enough function is removed and its every occurence is replaced with frozen(). Therefore for a group to become FROZEN every task must be frozen. The previous version could trigger a following bug: When cgroup is in the process of freezing (but none of its tasks are frozen yet), update_freezer_state() (called from freezer_read or freezer_write) would incorrectly report that a group is 'THAWED' (because nfrozen = 0), allowing the transaction FREEZING -> THAWED without writing anything to 'freezer.state'. This is incorrect according to the documentation. This could result in a 'THAWED' cgroup with frozen tasks inside. A code to reproduce this bug is available here: http://pentium.hopto.org/~thinred/repos/linux-misc/freezer_bug2.c [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index c287627bed3d..e7bebb7c6c38 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -153,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, kfree(cgroup_freezer(cgroup)); } -/* Task is frozen or will freeze immediately when next it gets woken */ -static bool is_task_frozen_enough(struct task_struct *task) -{ - return frozen(task) || - (task_is_stopped_or_traced(task) && freezing(task)); -} - /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the @@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) /* * caller must hold freezer->lock */ -static void update_freezer_state(struct cgroup *cgroup, +static void update_if_frozen(struct cgroup *cgroup, struct freezer *freezer) { struct cgroup_iter it; struct task_struct *task; unsigned int nfrozen = 0, ntotal = 0; + enum freezer_state old_state = freezer->state; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (is_task_frozen_enough(task)) + if (frozen(task)) nfrozen++; } - /* - * Transition to FROZEN when no new tasks can be added ensures - * that we never exist in the FROZEN state while there are unfrozen - * tasks. - */ - if (nfrozen == ntotal) - freezer->state = CGROUP_FROZEN; - else if (nfrozen > 0) - freezer->state = CGROUP_FREEZING; - else - freezer->state = CGROUP_THAWED; + if (old_state == CGROUP_THAWED) { + BUG_ON(nfrozen > 0); + } else if (old_state == CGROUP_FREEZING) { + if (nfrozen == ntotal) + freezer->state = CGROUP_FROZEN; + } else { /* old_state == CGROUP_FROZEN */ + BUG_ON(nfrozen != ntotal); + } + cgroup_iter_end(cgroup, &it); } @@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft, if (state == CGROUP_FREEZING) { /* We change from FREEZING to FROZEN lazily if the cgroup was * only partially frozen when we exitted write. */ - update_freezer_state(cgroup, freezer); + update_if_frozen(cgroup, freezer); state = freezer->state; } spin_unlock_irq(&freezer->lock); @@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) continue; - if (is_task_frozen_enough(task)) + if (frozen(task)) continue; if (!freezing(task) && !freezer_should_skip(task)) num_cant_freeze_now++; @@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup, spin_lock_irq(&freezer->lock); - update_freezer_state(cgroup, freezer); + update_if_frozen(cgroup, freezer); if (goal_state == freezer->state) goto out; From 97978e6d1f2da0073416870410459694fbdbfd9b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:35 -0700 Subject: [PATCH 041/139] cgroup: add clone_children control file The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Acked-by: Paul Menage Reviewed-by: Li Zefan Cc: Jamal Hadi Salim Cc: Matt Helsley Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 14 +++++++++-- include/linux/cgroup.h | 4 ++++ kernel/cgroup.c | 39 +++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index b34823ff1646..190018b0c649 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -18,7 +18,8 @@ CONTENTS: 1.2 Why are cgroups needed ? 1.3 How are cgroups implemented ? 1.4 What does notify_on_release do ? - 1.5 How do I use cgroups ? + 1.5 What does clone_children do ? + 1.6 How do I use cgroups ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Attaching processes @@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled value of their parents notify_on_release setting. The default value of a cgroup hierarchy's release_agent path is empty. -1.5 How do I use cgroups ? +1.5 What does clone_children do ? +--------------------------------- + +If the clone_children flag is enabled (1) in a cgroup, then all +cgroups created beneath will call the post_clone callbacks for each +subsystem of the newly created cgroup. Usually when this callback is +implemented for a subsystem, it copies the values of the parent +subsystem, this is the case for the cpuset. + +1.6 How do I use cgroups ? -------------------------- To start a new job that is to be contained within a cgroup, using diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 709dfb901d11..ed4ba111bc8d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -154,6 +154,10 @@ enum { * A thread in rmdir() is wating for this cgroup. */ CGRP_WAIT_ON_RMDIR, + /* + * Clone cgroup values when creating a new child cgroup + */ + CGRP_CLONE_CHILDREN, }; /* which pidlist file are we talking about? */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9270d532ec3c..4b218a46ddd3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } +static int clone_children(const struct cgroup *cgrp) +{ + return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); +} + /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (clone_children(&root->top_cgroup)) + seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); @@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + bool clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); + } else if (!strcmp(token, "clone_children")) { + opts->clone_children = true; } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) @@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); + if (opts->clone_children) + set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -3173,6 +3185,23 @@ fail: return ret; } +static u64 cgroup_clone_children_read(struct cgroup *cgrp, + struct cftype *cft) +{ + return clone_children(cgrp); +} + +static int cgroup_clone_children_write(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + if (val) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + else + clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -3203,6 +3232,11 @@ static struct cftype files[] = { .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, }; static struct cftype cft_release_agent = { @@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + if (clone_children(parent)) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); @@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } /* At error, ->destroy() callback has to free assigned ID. */ + if (clone_children(parent) && ss->post_clone) + ss->post_clone(ss, cgrp); } cgroup_lock_hierarchy(root); From 32a8cf235e2f192eb002755076994525cdbaa35a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: [PATCH 042/139] cgroup: make the mount options parsing more accurate Current behavior: ================= (1) When we mount a cgroup, we can specify the 'all' option which means to enable all the cgroup subsystems. This is the default option when no option is specified. (2) If we want to mount a cgroup with a subset of the supported cgroup subsystems, we have to specify a subsystems name list for the mount option. (3) If we specify another option like 'noprefix' or 'release_agent', the actual code wants the 'all' or a subsystem name option specified also. Not critical but a bit not friendly as we should assume (1) in this case. (4) Logically, the 'all' option is mutually exclusive with a subsystem name, but this is not detected. In other words: succeed : mount -t cgroup -o all,freezer cgroup /cgroup => is it 'all' or 'freezer' ? fails : mount -t cgroup -o noprefix cgroup /cgroup => succeed if we do '-o noprefix,all' The following patches consolidate a bit the mount options check. New behavior: ============= (1) untouched (2) untouched (3) the 'all' option will be by default when specifying other than a subsystem name option (4) raises an error In other words: fails : mount -t cgroup -o all,freezer cgroup /cgroup succeed : mount -t cgroup -o noprefix cgroup /cgroup For the sake of lisibility, the if ... then ... else ... if ... indentation when parsing the options has been changed to: if ... then ... continue fi Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Reviewed-by: Li Zefan Reviewed-by: Paul Menage Cc: Eric W. Biederman Cc: Jamal Hadi Salim Cc: Matt Helsley Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 90 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4b218a46ddd3..3e6517e51fd3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1074,7 +1074,8 @@ struct cgroup_sb_opts { */ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { - char *token, *o = data ?: "all"; + char *token, *o = data; + bool all_ss = false, one_ss = false; unsigned long mask = (unsigned long)-1; int i; bool module_pin_failed = false; @@ -1090,24 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) while ((token = strsep(&o, ",")) != NULL) { if (!*token) return -EINVAL; - if (!strcmp(token, "all")) { - /* Add all non-disabled subsystems */ - opts->subsys_bits = 0; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - if (ss == NULL) - continue; - if (!ss->disabled) - opts->subsys_bits |= 1ul << i; - } - } else if (!strcmp(token, "none")) { + if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ opts->none = true; - } else if (!strcmp(token, "noprefix")) { + continue; + } + if (!strcmp(token, "all")) { + /* Mutually exclusive option 'all' + subsystem name */ + if (one_ss) + return -EINVAL; + all_ss = true; + continue; + } + if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); - } else if (!strcmp(token, "clone_children")) { + continue; + } + if (!strcmp(token, "clone_children")) { opts->clone_children = true; - } else if (!strncmp(token, "release_agent=", 14)) { + continue; + } + if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; @@ -1115,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - } else if (!strncmp(token, "name=", 5)) { + continue; + } + if (!strncmp(token, "name=", 5)) { const char *name = token + 5; /* Can't specify an empty name */ if (!strlen(name)) @@ -1137,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) GFP_KERNEL); if (!opts->name) return -ENOMEM; - } else { - struct cgroup_subsys *ss; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - ss = subsys[i]; - if (ss == NULL) - continue; - if (!strcmp(token, ss->name)) { - if (!ss->disabled) - set_bit(i, &opts->subsys_bits); - break; - } - } - if (i == CGROUP_SUBSYS_COUNT) - return -ENOENT; + + continue; + } + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (strcmp(token, ss->name)) + continue; + if (ss->disabled) + continue; + + /* Mutually exclusive option 'all' + subsystem name */ + if (all_ss) + return -EINVAL; + set_bit(i, &opts->subsys_bits); + one_ss = true; + + break; + } + if (i == CGROUP_SUBSYS_COUNT) + return -ENOENT; + } + + /* + * If the 'all' option was specified select all the subsystems, + * otherwise 'all, 'none' and a subsystem name options were not + * specified, let's default to 'all' + */ + if (all_ss || (!all_ss && !one_ss && !opts->none)) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (ss->disabled) + continue; + set_bit(i, &opts->subsys_bits); } } From f4a2589feaef0a9b737a3e582b37ee96695bb25f Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: [PATCH 043/139] cgroups: add check for strcpy destination string overflow Function "strcpy" is used without check for maximum allowed source string length and could cause destination string overflow. Check for string length is added before using "strcpy". Function now is return error if source string length is more than a maximum. akpm: presently considered NotABug, but add the check for general future-safeness and robustness. Signed-off-by: Evgeny Kuznetsov Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3e6517e51fd3..5cf366965d0c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1922,6 +1922,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + if (strlen(buffer) >= PATH_MAX) + return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; strcpy(cgrp->root->release_agent_path, buffer); From 45531757b45cae0ce64c5aff08c2534d5a0fa3e7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:38 -0700 Subject: [PATCH 044/139] cgroup: notify ns_cgroup deprecated The ns_cgroup will be removed very soon. Let's warn, for this version, ns_cgroup is deprecated. Make ns_cgroup and clone_children exclusive. If the clone_children is set and the ns_cgroup is mounted, let's fail with EINVAL when the ns_cgroup subsys is created (a printk will help the user to understand why the creation fails). Update the feature remove schedule file with the deprecated ns_cgroup. Signed-off-by: Daniel Lezcano Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 17 +++++++++++++++++ kernel/ns_cgroup.c | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d2af87ba96e1..f3da8c0a3af2 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -526,6 +526,23 @@ Who: FUJITA Tomonori ---------------------------- +What: namespace cgroup (ns_cgroup) +When: 2.6.38 +Why: The ns_cgroup leads to some problems: + * cgroup creation is out-of-control + * cgroup name can conflict when pids are looping + * it is not possible to have a single process handling + a lot of namespaces without falling in a exponential creation time + * we may want to create a namespace without creating a cgroup + + The ns_cgroup is replaced by a compatibility flag 'clone_children', + where a newly created cgroup will copy the parent cgroup values. + The userspace has to manually create a cgroup and add a task to + the 'tasks' file. +Who: Daniel Lezcano + +---------------------------- + What: iwlwifi disable_hw_scan module parameters When: 2.6.40 Why: Hareware scan is the prefer method for iwlwifi devices for diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec8efe0..2c98ad94ba0e 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, return ERR_PTR(-EPERM); if (!cgroup_is_descendant(cgroup, current)) return ERR_PTR(-EPERM); + if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { + printk("ns_cgroup can't be created with parent " + "'clone_children' set.\n"); + return ERR_PTR(-EINVAL); + } + + printk_once("ns_cgroup deprecated: consider using the " + "'clone_children' flag without the ns_cgroup.\n"); ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); if (!ns_cgroup) From 0c270f8f9988fb0d93ea214fdcff7ab90eb3d894 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:39 -0700 Subject: [PATCH 045/139] memcg: fix race in file_mapped accouting flag management Presently memory cgroup accounts file-mapped by counter and flag. counter is working in the same way with zone_stat but FileMapped flag only exists in memcg (for helping move_account). This flag can be updated wrongly in a case. Assume CPU0 and CPU1 and a thread mapping a page on CPU0, another thread unmapping it on CPU1. CPU0 CPU1 rmv rmap (mapcount 1->0) add rmap (mapcount 0->1) lock_page_cgroup() memcg counter+1 (some delay) set MAPPED FLAG. unlock_page_cgroup() lock_page_cgroup() memcg counter-1 clear MAPPED flag In the above sequence counter is properly updated but FLAG is not. This means that representing a state by a flag which is maintained by counter needs some special care. To handle this, when clearing a flag, this patch check mapcount directly and clear the flag only when mapcount == 0. (if mapcount >0, someone will make it to zero later and flag will be cleared.) Reverse case, dec-after-inc cannot be a problem because page_table_lock() works well for it. (IOW, to make above sequence, 2 processes should touch the same page at once with map/unmap.) Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9be3cf8a5da4..0e3fdbd809c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1485,7 +1485,8 @@ void mem_cgroup_update_file_mapped(struct page *page, int val) SetPageCgroupFileMapped(pc); } else { __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - ClearPageCgroupFileMapped(pc); + if (!page_mapped(page)) /* for race between dec->inc counter */ + ClearPageCgroupFileMapped(pc); } done: From 32047e2a85f06633ee4c53e2d0346fbcd34e480b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:40 -0700 Subject: [PATCH 046/139] memcg: avoid lock in updating file_mapped (Was fix race in file_mapped accouting flag management At accounting file events per memory cgroup, we need to find memory cgroup via page_cgroup->mem_cgroup. Now, we use lock_page_cgroup() for guarantee pc->mem_cgroup is not overwritten while we make use of it. But, considering the context which page-cgroup for files are accessed, we can use alternative light-weight mutual execusion in the most case. At handling file-caches, the only race we have to take care of is "moving" account, IOW, overwriting page_cgroup->mem_cgroup. (See comment in the patch) Unlike charge/uncharge, "move" happens not so frequently. It happens only when rmdir() and task-moving (with a special settings.) This patch adds a race-checker for file-cache-status accounting v.s. account moving. The new per-cpu-per-memcg counter MEM_CGROUP_ON_MOVE is added. The routine for account move 1. Increment it before start moving 2. Call synchronize_rcu() 3. Decrement it after the end of moving. By this, file-status-counting routine can check it needs to call lock_page_cgroup(). In most case, I doesn't need to call it. Following is a perf data of a process which mmap()/munmap 32MB of file cache in a minute. Before patch: 28.25% mmap mmap [.] main 22.64% mmap [kernel.kallsyms] [k] page_fault 9.96% mmap [kernel.kallsyms] [k] mem_cgroup_update_file_mapped 3.67% mmap [kernel.kallsyms] [k] filemap_fault 3.50% mmap [kernel.kallsyms] [k] unmap_vmas 2.99% mmap [kernel.kallsyms] [k] __do_fault 2.76% mmap [kernel.kallsyms] [k] find_get_page After patch: 30.00% mmap mmap [.] main 23.78% mmap [kernel.kallsyms] [k] page_fault 5.52% mmap [kernel.kallsyms] [k] mem_cgroup_update_file_mapped 3.81% mmap [kernel.kallsyms] [k] unmap_vmas 3.26% mmap [kernel.kallsyms] [k] find_get_page 3.18% mmap [kernel.kallsyms] [k] __do_fault 3.03% mmap [kernel.kallsyms] [k] filemap_fault 2.40% mmap [kernel.kallsyms] [k] handle_mm_fault 2.40% mmap [kernel.kallsyms] [k] do_page_fault This patch reduces memcg's cost to some extent. (mem_cgroup_update_file_mapped is called by both of map/unmap) Note: It seems some more improvements are required..but no idea. maybe removing set/unset flag is required. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 99 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 14 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0e3fdbd809c7..dd845d25827a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -90,6 +90,7 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */ + MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ MEM_CGROUP_STAT_NSTATS, }; @@ -1051,7 +1052,46 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg) return swappiness; } -/* A routine for testing mem is not under move_account */ +static void mem_cgroup_start_move(struct mem_cgroup *mem) +{ + int cpu; + /* Because this is for moving account, reuse mc.lock */ + spin_lock(&mc.lock); + for_each_possible_cpu(cpu) + per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; + spin_unlock(&mc.lock); + + synchronize_rcu(); +} + +static void mem_cgroup_end_move(struct mem_cgroup *mem) +{ + int cpu; + + if (!mem) + return; + spin_lock(&mc.lock); + for_each_possible_cpu(cpu) + per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; + spin_unlock(&mc.lock); +} +/* + * 2 routines for checking "mem" is under move_account() or not. + * + * mem_cgroup_stealed() - checking a cgroup is mc.from or not. This is used + * for avoiding race in accounting. If true, + * pc->mem_cgroup may be overwritten. + * + * mem_cgroup_under_move() - checking a cgroup is mc.from or mc.to or + * under hierarchy of moving cgroups. This is for + * waiting at hith-memory prressure caused by "move". + */ + +static bool mem_cgroup_stealed(struct mem_cgroup *mem) +{ + VM_BUG_ON(!rcu_read_lock_held()); + return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; +} static bool mem_cgroup_under_move(struct mem_cgroup *mem) { @@ -1462,35 +1502,62 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) /* * Currently used to update mapped file statistics, but the routine can be * generalized to update other statistics as well. + * + * Notes: Race condition + * + * We usually use page_cgroup_lock() for accessing page_cgroup member but + * it tends to be costly. But considering some conditions, we doesn't need + * to do so _always_. + * + * Considering "charge", lock_page_cgroup() is not required because all + * file-stat operations happen after a page is attached to radix-tree. There + * are no race with "charge". + * + * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup + * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even + * if there are race with "uncharge". Statistics itself is properly handled + * by flags. + * + * Considering "move", this is an only case we see a race. To make the race + * small, we check MEM_CGROUP_ON_MOVE percpu value and detect there are + * possibility of race condition. If there is, we take a lock. */ void mem_cgroup_update_file_mapped(struct page *page, int val) { struct mem_cgroup *mem; - struct page_cgroup *pc; + struct page_cgroup *pc = lookup_page_cgroup(page); + bool need_unlock = false; - pc = lookup_page_cgroup(page); if (unlikely(!pc)) return; - lock_page_cgroup(pc); + rcu_read_lock(); mem = pc->mem_cgroup; - if (!mem || !PageCgroupUsed(pc)) - goto done; - - /* - * Preemption is already disabled. We can use __this_cpu_xxx - */ + if (unlikely(!mem || !PageCgroupUsed(pc))) + goto out; + /* pc->mem_cgroup is unstable ? */ + if (unlikely(mem_cgroup_stealed(mem))) { + /* take a lock against to access pc->mem_cgroup */ + lock_page_cgroup(pc); + need_unlock = true; + mem = pc->mem_cgroup; + if (!mem || !PageCgroupUsed(pc)) + goto out; + } if (val > 0) { - __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); + this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); SetPageCgroupFileMapped(pc); } else { - __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); + this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); if (!page_mapped(page)) /* for race between dec->inc counter */ ClearPageCgroupFileMapped(pc); } -done: - unlock_page_cgroup(pc); +out: + if (unlikely(need_unlock)) + unlock_page_cgroup(pc); + rcu_read_unlock(); + return; } /* @@ -3039,6 +3106,7 @@ move_account: lru_add_drain_all(); drain_all_stock_sync(); ret = 0; + mem_cgroup_start_move(mem); for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { enum lru_list l; @@ -3052,6 +3120,7 @@ move_account: if (ret) break; } + mem_cgroup_end_move(mem); memcg_oom_recover(mem); /* it seems parent cgroup doesn't have enough mem */ if (ret == -ENOMEM) @@ -4514,6 +4583,7 @@ static void mem_cgroup_clear_mc(void) mc.to = NULL; mc.moving_task = NULL; spin_unlock(&mc.lock); + mem_cgroup_end_move(from); memcg_oom_recover(from); memcg_oom_recover(to); wake_up_all(&mc.waitq); @@ -4544,6 +4614,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, VM_BUG_ON(mc.moved_charge); VM_BUG_ON(mc.moved_swap); VM_BUG_ON(mc.moving_task); + mem_cgroup_start_move(from); spin_lock(&mc.lock); mc.from = from; mc.to = mem; From 7d74b06f240f1bd1b4b68dd6fe84164d8bf4e315 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:41 -0700 Subject: [PATCH 047/139] memcg: use for_each_mem_cgroup In memory cgroup management, we sometimes have to walk through subhierarchy of cgroup to gather informaiton, or lock something, etc. Now, to do that, mem_cgroup_walk_tree() function is provided. It calls given callback function per cgroup found. But the bad thing is that it has to pass a fixed style function and argument, "void*" and it adds much type casting to memcontrol.c. To make the code clean, this patch replaces walk_tree() with for_each_mem_cgroup_tree(iter, root) An iterator style call. The good point is that iterator call doesn't have to assume what kind of function is called under it. A bad point is that it may cause reference-count leak if a caller use "break" from the loop by mistake. I think the benefit is larger. The modified code seems straigtforward and easy to read because we don't have misterious callbacks and pointer cast. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 188 ++++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 96 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd845d25827a..5e7a14d117c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -660,41 +660,58 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return mem; } -/* - * Call callback function against all cgroup under hierarchy tree. - */ -static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, - int (*func)(struct mem_cgroup *, void *)) +/* The caller has to guarantee "mem" exists before calling this */ +static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) { - int found, ret, nextid; - struct cgroup_subsys_state *css; - struct mem_cgroup *mem; - - if (!root->use_hierarchy) - return (*func)(root, data); - - nextid = 1; - do { - ret = 0; - mem = NULL; - - rcu_read_lock(); - css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, - &found); - if (css && css_tryget(css)) - mem = container_of(css, struct mem_cgroup, css); - rcu_read_unlock(); - - if (mem) { - ret = (*func)(mem, data); - css_put(&mem->css); - } - nextid = found + 1; - } while (!ret && css); - - return ret; + if (mem && css_tryget(&mem->css)) + return mem; + return NULL; } +static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, + struct mem_cgroup *root, + bool cond) +{ + int nextid = css_id(&iter->css) + 1; + int found; + int hierarchy_used; + struct cgroup_subsys_state *css; + + hierarchy_used = iter->use_hierarchy; + + css_put(&iter->css); + if (!cond || !hierarchy_used) + return NULL; + + do { + iter = NULL; + rcu_read_lock(); + + css = css_get_next(&mem_cgroup_subsys, nextid, + &root->css, &found); + if (css && css_tryget(css)) + iter = container_of(css, struct mem_cgroup, css); + rcu_read_unlock(); + /* If css is NULL, no more cgroups will be found */ + nextid = found + 1; + } while (css && !iter); + + return iter; +} +/* + * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please + * be careful that "break" loop is not allowed. We have reference count. + * Instead of that modify "cond" to be false and "continue" to exit the loop. + */ +#define for_each_mem_cgroup_tree_cond(iter, root, cond) \ + for (iter = mem_cgroup_start_loop(root);\ + iter != NULL;\ + iter = mem_cgroup_get_next(iter, root, cond)) + +#define for_each_mem_cgroup_tree(iter, root) \ + for_each_mem_cgroup_tree_cond(iter, root, true) + + static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) { return (mem == root_mem_cgroup); @@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) return false; } -static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) -{ - int *val = data; - (*val)++; - return 0; -} - /** * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. * @memcg: The memory cgroup that went over limit @@ -1213,7 +1223,10 @@ done: static int mem_cgroup_count_children(struct mem_cgroup *mem) { int num = 0; - mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, mem) + num++; return num; } @@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, return total; } -static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data) -{ - int *val = (int *)data; - int x; - /* - * Logically, we can stop scanning immediately when we find - * a memcg is already locked. But condidering unlock ops and - * creation/removal of memcg, scan-all is simple operation. - */ - x = atomic_inc_return(&mem->oom_lock); - *val = max(x, *val); - return 0; -} /* * Check OOM-Killer is already running under our hierarchy. * If someone is running, return false. */ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) { - int lock_count = 0; + int x, lock_count = 0; + struct mem_cgroup *iter; - mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); + for_each_mem_cgroup_tree(iter, mem) { + x = atomic_inc_return(&iter->oom_lock); + lock_count = max(x, lock_count); + } if (lock_count == 1) return true; return false; } -static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) +static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) { + struct mem_cgroup *iter; + /* * When a new child is created while the hierarchy is under oom, * mem_cgroup_oom_lock() may not be called. We have to use * atomic_add_unless() here. */ - atomic_add_unless(&mem->oom_lock, -1, 0); + for_each_mem_cgroup_tree(iter, mem) + atomic_add_unless(&iter->oom_lock, -1, 0); return 0; } -static void mem_cgroup_oom_unlock(struct mem_cgroup *mem) -{ - mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb); -} static DEFINE_MUTEX(memcg_oom_mutex); static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); @@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, return retval; } -struct mem_cgroup_idx_data { - s64 val; - enum mem_cgroup_stat_index idx; -}; -static int -mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) +static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, + enum mem_cgroup_stat_index idx) { - struct mem_cgroup_idx_data *d = data; - d->val += mem_cgroup_read_stat(mem, d->idx); - return 0; -} + struct mem_cgroup *iter; + s64 val = 0; -static void -mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, - enum mem_cgroup_stat_index idx, s64 *val) -{ - struct mem_cgroup_idx_data d; - d.idx = idx; - d.val = 0; - mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); - *val = d.val; + /* each per cpu's value can be minus.Then, use s64 */ + for_each_mem_cgroup_tree(iter, mem) + val += mem_cgroup_read_stat(iter, idx); + + if (val < 0) /* race ? */ + val = 0; + return val; } static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) { - u64 idx_val, val; + u64 val; if (!mem_cgroup_is_root(mem)) { if (!swap) @@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) return res_counter_read_u64(&mem->memsw, RES_USAGE); } - mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); - val = idx_val; - mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val); - val += idx_val; + val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); + val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); - if (swap) { - mem_cgroup_get_recursive_idx_stat(mem, - MEM_CGROUP_STAT_SWAPOUT, &idx_val); - val += idx_val; - } + if (swap) + val += mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_SWAPOUT); return val << PAGE_SHIFT; } @@ -3459,9 +3450,9 @@ struct { }; -static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) +static void +mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) { - struct mcs_total_stat *s = data; s64 val; /* per cpu stat */ @@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; - return 0; } static void mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) { - mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, mem) + mem_cgroup_get_local_stat(iter, s); } static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, @@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b) return _a->threshold - _b->threshold; } -static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) +static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) { struct mem_cgroup_eventfd_list *ev; @@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) static void mem_cgroup_oom_notify(struct mem_cgroup *mem) { - mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, mem) + mem_cgroup_oom_notify_cb(iter); } static int mem_cgroup_usage_register_event(struct cgroup *cgrp, From 711d3d2c9bc3fb7cb5116352fecdb5b4adb6db6e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:42 -0700 Subject: [PATCH 048/139] memcg: cpu hotplug aware percpu count updates Now, memcgroup's per cpu coutner uses for_each_possible_cpu() to get the value. It's better to use for_each_online_cpu() and a cpu hotplug handler. This patch only handles statistics counter. MEM_CGROUP_ON_MOVE will be handled in another patch. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 102 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5e7a14d117c7..31a1d3b71eee 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -89,7 +89,9 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ - MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */ + MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ + /* incremented at every pagein/pageout */ + MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA, MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ MEM_CGROUP_STAT_NSTATS, @@ -255,6 +257,12 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu *stat; + /* + * used when a cpu is offlined or other synchronizations + * See mem_cgroup_read_stat(). + */ + struct mem_cgroup_stat_cpu nocpu_base; + spinlock_t pcp_counter_lock; }; /* Stuffs for move charges at task migration. */ @@ -531,14 +539,40 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) return mz; } +/* + * Implementation Note: reading percpu statistics for memcg. + * + * Both of vmstat[] and percpu_counter has threshold and do periodic + * synchronization to implement "quick" read. There are trade-off between + * reading cost and precision of value. Then, we may have a chance to implement + * a periodic synchronizion of counter in memcg's counter. + * + * But this _read() function is used for user interface now. The user accounts + * memory usage by memory cgroup and he _always_ requires exact value because + * he accounts memory. Even if we provide quick-and-fuzzy read, we always + * have to visit all online cpus and make sum. So, for now, unnecessary + * synchronization is not implemented. (just implemented for cpu hotplug) + * + * If there are kernel internal actions which can make use of some not-exact + * value, and reading all cpu value can be performance bottleneck in some + * common workload, threashold and synchonization as vmstat[] should be + * implemented. + */ static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, enum mem_cgroup_stat_index idx) { int cpu; s64 val = 0; - for_each_possible_cpu(cpu) + get_online_cpus(); + for_each_online_cpu(cpu) val += per_cpu(mem->stat->count[idx], cpu); +#ifdef CONFIG_HOTPLUG_CPU + spin_lock(&mem->pcp_counter_lock); + val += mem->nocpu_base.count[idx]; + spin_unlock(&mem->pcp_counter_lock); +#endif + put_online_cpus(); return val; } @@ -663,9 +697,28 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) /* The caller has to guarantee "mem" exists before calling this */ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) { - if (mem && css_tryget(&mem->css)) - return mem; - return NULL; + struct cgroup_subsys_state *css; + int found; + + if (!mem) /* ROOT cgroup has the smallest ID */ + return root_mem_cgroup; /*css_put/get against root is ignored*/ + if (!mem->use_hierarchy) { + if (css_tryget(&mem->css)) + return mem; + return NULL; + } + rcu_read_lock(); + /* + * searching a memory cgroup which has the smallest ID under given + * ROOT cgroup. (ID >= 1) + */ + css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); + if (css && css_tryget(css)) + mem = container_of(css, struct mem_cgroup, css); + else + mem = NULL; + rcu_read_unlock(); + return mem; } static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, @@ -680,9 +733,13 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, hierarchy_used = iter->use_hierarchy; css_put(&iter->css); - if (!cond || !hierarchy_used) + /* If no ROOT, walk all, ignore hierarchy */ + if (!cond || (root && !hierarchy_used)) return NULL; + if (!root) + root = root_mem_cgroup; + do { iter = NULL; rcu_read_lock(); @@ -711,6 +768,9 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, #define for_each_mem_cgroup_tree(iter, root) \ for_each_mem_cgroup_tree_cond(iter, root, true) +#define for_each_mem_cgroup_all(iter) \ + for_each_mem_cgroup_tree_cond(iter, NULL, true) + static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) { @@ -1676,15 +1736,38 @@ static void drain_all_stock_sync(void) atomic_dec(&memcg_drain_count); } -static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb, +/* + * This function drains percpu counter value from DEAD cpu and + * move it to local cpu. Note that this function can be preempted. + */ +static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) +{ + int i; + + spin_lock(&mem->pcp_counter_lock); + for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { + s64 x = per_cpu(mem->stat->count[i], cpu); + + per_cpu(mem->stat->count[i], cpu) = 0; + mem->nocpu_base.count[i] += x; + } + spin_unlock(&mem->pcp_counter_lock); +} + +static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { int cpu = (unsigned long)hcpu; struct memcg_stock_pcp *stock; + struct mem_cgroup *iter; - if (action != CPU_DEAD) + if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN) return NOTIFY_OK; + + for_each_mem_cgroup_all(iter) + mem_cgroup_drain_pcp_counter(iter, cpu); + stock = &per_cpu(memcg_stock, cpu); drain_stock(stock); return NOTIFY_OK; @@ -4098,6 +4181,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) vfree(mem); mem = NULL; } + spin_lock_init(&mem->pcp_counter_lock); return mem; } @@ -4224,7 +4308,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) &per_cpu(memcg_stock, cpu); INIT_WORK(&stock->work, drain_local_stock); } - hotcpu_notifier(memcg_stock_cpu_callback, 0); + hotcpu_notifier(memcg_cpu_hotplug_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); mem->use_hierarchy = parent->use_hierarchy; From 1489ebad8b5b20300562f634f279cb9c435fd90b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:42 -0700 Subject: [PATCH 049/139] memcg: cpu hotplug aware quick acount_move detection An event counter MEM_CGROUP_ON_MOVE is used for quick check whether file stat update can be done in async manner or not. Now, it use percpu counter and for_each_possible_cpu to update. This patch replaces for_each_possible_cpu to for_each_online_cpu and adds necessary synchronization logic at CPU HOTPLUG. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 31a1d3b71eee..52840adae62a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1132,11 +1132,14 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg) static void mem_cgroup_start_move(struct mem_cgroup *mem) { int cpu; - /* Because this is for moving account, reuse mc.lock */ - spin_lock(&mc.lock); - for_each_possible_cpu(cpu) + + get_online_cpus(); + spin_lock(&mem->pcp_counter_lock); + for_each_online_cpu(cpu) per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; - spin_unlock(&mc.lock); + mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; + spin_unlock(&mem->pcp_counter_lock); + put_online_cpus(); synchronize_rcu(); } @@ -1147,10 +1150,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem) if (!mem) return; - spin_lock(&mc.lock); - for_each_possible_cpu(cpu) + get_online_cpus(); + spin_lock(&mem->pcp_counter_lock); + for_each_online_cpu(cpu) per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; - spin_unlock(&mc.lock); + mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; + spin_unlock(&mem->pcp_counter_lock); + put_online_cpus(); } /* * 2 routines for checking "mem" is under move_account() or not. @@ -1751,6 +1757,17 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) per_cpu(mem->stat->count[i], cpu) = 0; mem->nocpu_base.count[i] += x; } + /* need to clear ON_MOVE value, works as a kind of lock. */ + per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; + spin_unlock(&mem->pcp_counter_lock); +} + +static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) +{ + int idx = MEM_CGROUP_ON_MOVE; + + spin_lock(&mem->pcp_counter_lock); + per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; spin_unlock(&mem->pcp_counter_lock); } @@ -1762,6 +1779,12 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, struct memcg_stock_pcp *stock; struct mem_cgroup *iter; + if ((action == CPU_ONLINE)) { + for_each_mem_cgroup_all(iter) + synchronize_mem_cgroup_on_move(iter, cpu); + return NOTIFY_OK; + } + if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN) return NOTIFY_OK; From 26174efd42100eefac67732c0c12f41a205fa335 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:33:43 -0700 Subject: [PATCH 050/139] memcg: generic filestat update interface This patch extracts the core logic from mem_cgroup_update_file_mapped() as mem_cgroup_update_file_stat() and adds a wrapper. As a planned future update, memory cgroup has to count dirty pages to implement dirty_ratio/limit. And more, the number of dirty pages is required to kick flusher thread to start writeback. (Now, no kick.) This patch is preparation for it and makes other statistics implementation clearer. Just a clean up. Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Reviewed-by: Greg Thelen Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 52840adae62a..9a99cfaf0a19 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1591,7 +1591,8 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) * small, we check MEM_CGROUP_ON_MOVE percpu value and detect there are * possibility of race condition. If there is, we take a lock. */ -void mem_cgroup_update_file_mapped(struct page *page, int val) + +static void mem_cgroup_update_file_stat(struct page *page, int idx, int val) { struct mem_cgroup *mem; struct page_cgroup *pc = lookup_page_cgroup(page); @@ -1613,13 +1614,18 @@ void mem_cgroup_update_file_mapped(struct page *page, int val) if (!mem || !PageCgroupUsed(pc)) goto out; } - if (val > 0) { - this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - SetPageCgroupFileMapped(pc); - } else { - this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - if (!page_mapped(page)) /* for race between dec->inc counter */ + + this_cpu_add(mem->stat->count[idx], val); + + switch (idx) { + case MEM_CGROUP_STAT_FILE_MAPPED: + if (val > 0) + SetPageCgroupFileMapped(pc); + else if (!page_mapped(page)) ClearPageCgroupFileMapped(pc); + break; + default: + BUG(); } out: @@ -1629,6 +1635,11 @@ out: return; } +void mem_cgroup_update_file_mapped(struct page *page, int val) +{ + mem_cgroup_update_file_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, val); +} + /* * size of first charge trial. "32" comes from vmscan.c's magic value. * TODO: maybe necessary to use big numbers in big irons. From c4b5ed250eebf854d40f27b43362c80f115cb57a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:44 -0700 Subject: [PATCH 051/139] ptrace: annotate lock context change on exit_ptrace() exit_ptrace() releases and regrabs tasklist_lock but was missing proper annotation. Add it. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f34d798ef4a2..4afd9b86cc0b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) * and reacquire the lock. */ void exit_ptrace(struct task_struct *tracer) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) { struct task_struct *p, *n; LIST_HEAD(ptrace_dead); From 4abf986960ecda6a87fc2f795aacf888a2f0127e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:45 -0700 Subject: [PATCH 052/139] ptrace: change signature of sys_ptrace() and friends Since userspace API of ptrace syscall defines @addr and @data as void pointers, it would be more appropriate to define them as unsigned long in kernel. Therefore related functions are changed also. 'unsigned long' is typically used in other places in kernel as an opaque data type and that using this helps cleaning up a lot of warnings from sparse. Suggested-by: Arnd Bergmann Signed-off-by: Namhyung Kim Acked-by: Arnd Bergmann Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 9 ++++++--- include/linux/syscalls.h | 3 ++- kernel/ptrace.c | 16 ++++++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 4272521e29e9..67a4cd77c352 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -108,7 +108,8 @@ extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); -extern int ptrace_request(struct task_struct *child, long request, long addr, long data); +extern int ptrace_request(struct task_struct *child, long request, + unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); @@ -132,8 +133,10 @@ static inline void ptrace_unlink(struct task_struct *child) __ptrace_unlink(child); } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data); -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data); +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data); /** * task_ptrace - return %PT_* flags that apply to a task diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e6319d18a55d..cacc27a0e285 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -701,7 +701,8 @@ asmlinkage long sys_nfsservctl(int cmd, asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_ni_syscall(void); -asmlinkage long sys_ptrace(long request, long pid, long addr, long data); +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, + unsigned long data); asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4afd9b86cc0b..06981a8b271b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -404,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds return copied; } -static int ptrace_setoptions(struct task_struct *child, long data) +static int ptrace_setoptions(struct task_struct *child, unsigned long data) { child->ptrace &= ~PT_TRACE_MASK; @@ -483,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) #define is_sysemu_singlestep(request) 0 #endif -static int ptrace_resume(struct task_struct *child, long request, long data) +static int ptrace_resume(struct task_struct *child, long request, + unsigned long data) { if (!valid_signal(data)) return -EIO; @@ -560,7 +561,7 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, #endif int ptrace_request(struct task_struct *child, long request, - long addr, long data) + unsigned long addr, unsigned long data) { int ret = -EIO; siginfo_t siginfo; @@ -693,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, + unsigned long, data) { struct task_struct *child; long ret; @@ -734,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) return ret; } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { unsigned long tmp; int copied; @@ -745,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) return put_user(tmp, (unsigned long __user *)data); } -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { int copied; From 9fed81dc40f5a1ac2783bcc78d4029873be72894 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:46 -0700 Subject: [PATCH 053/139] ptrace: cleanup ptrace_request() Use new 'datavp' and 'datalp' variables to remove unnecesary castings. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 06981a8b271b..ea7ce0215cd1 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -565,6 +565,8 @@ int ptrace_request(struct task_struct *child, long request, { int ret = -EIO; siginfo_t siginfo; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; switch (request) { case PTRACE_PEEKTEXT: @@ -581,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setoptions(child, data); break; case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned long __user *) data); + ret = put_user(child->ptrace_message, datalp); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) - ret = copy_siginfo_to_user((siginfo_t __user *) data, - &siginfo); + ret = copy_siginfo_to_user(datavp, &siginfo); break; case PTRACE_SETSIGINFO: - if (copy_from_user(&siginfo, (siginfo_t __user *) data, - sizeof siginfo)) + if (copy_from_user(&siginfo, datavp, sizeof siginfo)) ret = -EFAULT; else ret = ptrace_setsiginfo(child, &siginfo); @@ -624,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request, } mmput(mm); - ret = put_user(tmp, (unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } #endif @@ -653,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request, case PTRACE_SETREGSET: { struct iovec kiov; - struct iovec __user *uiov = (struct iovec __user *) data; + struct iovec __user *uiov = datavp; if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) return -EFAULT; From 9b05a69e0534ec70bc94921936ffa05b330507cb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:47 -0700 Subject: [PATCH 054/139] ptrace: change signature of arch_ptrace() Fix up the arguments to arch_ptrace() to take account of the fact that @addr and @data are now unsigned long rather than long as of a preceding patch in this series. Signed-off-by: Namhyung Kim Cc: Acked-by: Roland McGrath Acked-by: David Howells Acked-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 7 ++++--- arch/arm/kernel/ptrace.c | 3 ++- arch/avr32/kernel/ptrace.c | 3 ++- arch/blackfin/kernel/ptrace.c | 3 ++- arch/cris/arch-v10/kernel/ptrace.c | 7 ++++--- arch/cris/arch-v32/kernel/ptrace.c | 3 ++- arch/frv/kernel/ptrace.c | 3 ++- arch/h8300/kernel/ptrace.c | 7 ++++--- arch/ia64/kernel/ptrace.c | 3 ++- arch/m32r/kernel/ptrace.c | 3 ++- arch/m68k/kernel/ptrace.c | 9 +++++---- arch/m68knommu/kernel/ptrace.c | 7 ++++--- arch/microblaze/kernel/ptrace.c | 3 ++- arch/mips/kernel/ptrace.c | 3 ++- arch/mn10300/kernel/ptrace.c | 3 ++- arch/parisc/kernel/ptrace.c | 11 ++++++----- arch/powerpc/kernel/ptrace.c | 15 ++++++++------- arch/s390/kernel/ptrace.c | 3 ++- arch/score/kernel/ptrace.c | 3 ++- arch/sh/kernel/ptrace_32.c | 21 +++++++++++---------- arch/sh/kernel/ptrace_64.c | 6 ++++-- arch/sparc/kernel/ptrace_32.c | 3 ++- arch/sparc/kernel/ptrace_64.c | 7 ++++--- arch/tile/kernel/ptrace.c | 9 ++++++--- arch/um/kernel/ptrace.c | 5 +++-- arch/um/sys-i386/ptrace.c | 4 ++-- arch/um/sys-x86_64/ptrace.c | 4 ++-- arch/x86/kernel/ptrace.c | 7 ++++--- arch/xtensa/kernel/ptrace.c | 3 ++- include/linux/ptrace.h | 3 ++- 30 files changed, 101 insertions(+), 70 deletions(-) diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index baa903602f6a..e2af5eb59bb4 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -269,7 +269,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; size_t copied; @@ -292,7 +293,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_PEEKUSR: force_successful_syscall_return(); ret = get_reg(child, addr); - DBG(DBG_MEM, ("peek $%ld->%#lx\n", addr, ret)); + DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret)); break; /* When I and D space are separate, this will have to be fixed. */ @@ -302,7 +303,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_POKEUSR: /* write the specified register */ - DBG(DBG_MEM, ("poke $%ld<-%#lx\n", addr, data)); + DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data)); ret = put_reg(child, addr, data); break; default: diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index e0cb6370ed14..9bca6165459e 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1075,7 +1075,8 @@ out: } #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index 5e73c25f8f85..ecea9b6bfab4 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c @@ -146,7 +146,8 @@ static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs) return ret; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index b35839354130..8e3083ccd88a 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -240,7 +240,8 @@ void user_disable_single_step(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SINGLESTEP); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index e70c804e9377..d411e024e05f 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -76,7 +76,8 @@ ptrace_disable(struct task_struct *child) * (in user space) where the result of the ptrace call is written (instead of * being returned). */ -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -141,7 +142,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - data += sizeof(long); + data += sizeof(unsigned long); } break; @@ -165,7 +166,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f4ebd1e7d0f5..3e058a121753 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -126,7 +126,8 @@ ptrace_disable(struct task_struct *child) } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index fac028936a04..e9dbfad93589 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -254,7 +254,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int ret; diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index df114122ebdf..ef1aa0b8b20f 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -50,7 +50,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -120,7 +121,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EFAULT; break; } - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; @@ -135,7 +136,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } h8300_put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 7c7909f9bc93..8848f43d819e 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1177,7 +1177,8 @@ ptrace_disable (struct task_struct *child) } long -arch_ptrace (struct task_struct *child, long request, long addr, long data) +arch_ptrace (struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PTRACE_PEEKTEXT: diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 0021ade4cba8..5e00e0a41fff 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -622,7 +622,8 @@ void ptrace_disable(struct task_struct *child) } long -arch_ptrace(struct task_struct *child, long request, long addr, long data) +arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 616e59752c29..583f59fcc363 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -156,7 +156,8 @@ void user_disable_single_step(struct task_struct *child) singlestep_disable(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int i, ret = 0; @@ -200,7 +201,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) * into internal fpu reg representation */ if (FPU_IS_EMU && (addr < 45) && !(addr % 3)) { - data = (unsigned long)data << 15; + data <<= 15; data = (data & 0xffff0000) | ((data & 0x0000ffff) >> 1); } @@ -215,7 +216,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = put_user(tmp, (unsigned long *)data); if (ret) break; - data += sizeof(long); + data += sizeof(unsigned long); } break; @@ -229,7 +230,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp |= get_reg(child, PT_SR) & ~SR_MASK; } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } break; diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 6fe7c38cd556..7dbb08f5534e 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -112,7 +112,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -184,7 +185,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EFAULT; break; } - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; @@ -204,7 +205,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp |= get_reg(child, PT_SR) & ~(SR_MASK << 16); } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index dc03ffc8174a..3544bc157406 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -73,7 +73,8 @@ static microblaze_reg_t *reg_save_addr(unsigned reg_offs, return (microblaze_reg_t *)((char *)regs + reg_offs); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int rval; unsigned long val = 0; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c8777333e198..95c3ae8b198c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -255,7 +255,8 @@ int ptrace_set_watch_regs(struct task_struct *child, return 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index cf847dabc1bd..ec4b41439e99 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -295,7 +295,8 @@ void ptrace_disable(struct task_struct *child) /* * handle the arch-specific side of process tracing */ -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int ret; diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index c4f49e45129d..03920db4af45 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -110,7 +110,8 @@ void user_enable_block_step(struct task_struct *task) pa_psw(task)->l = 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; long ret = -EIO; @@ -120,8 +121,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* Read the word at location addr in the USER area. For ptraced processes, the kernel saves all regs on a syscall. */ case PTRACE_PEEKUSR: - if ((addr & (sizeof(long)-1)) || - (unsigned long) addr >= sizeof(struct pt_regs)) + if ((addr & (sizeof(unsigned long)-1)) || + addr >= sizeof(struct pt_regs)) break; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned long *) data); @@ -151,8 +152,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - if ((addr & (sizeof(long)-1)) || - (unsigned long) addr >= sizeof(struct pt_regs)) + if ((addr & (sizeof(unsigned long)-1)) || + addr >= sizeof(struct pt_regs)) break; if ((addr >= PT_GR1 && addr <= PT_GR31) || addr == PT_IAOQ0 || addr == PT_IAOQ1 || diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 286d9783d93f..136763568a7b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1406,8 +1406,8 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version */ -static long arch_ptrace_old(struct task_struct *child, long request, long addr, - long data) +static long arch_ptrace_old(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ @@ -1434,7 +1434,8 @@ static long arch_ptrace_old(struct task_struct *child, long request, long addr, return -EPERM; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; @@ -1446,11 +1447,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; @@ -1474,11 +1475,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 83339d33c4b1..019bb714db49 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -343,7 +343,8 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) return __poke_user(child, addr, data); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { ptrace_area parea; int copied, ret; diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 174c6422b096..894dcbf72099 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -325,7 +325,8 @@ void ptrace_disable(struct task_struct *child) } long -arch_ptrace(struct task_struct *child, long request, long addr, long data) +arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (void __user *)data; diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 2cd42b58cb20..34bf03745e86 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -365,7 +365,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sh_native_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { struct user * dummy = NULL; unsigned long __user *datap = (unsigned long __user *)data; @@ -383,17 +384,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr < sizeof(struct pt_regs)) tmp = get_stack_long(child, addr); - else if (addr >= (long) &dummy->fpu && - addr < (long) &dummy->u_fpvalid) { + else if (addr >= (unsigned long) &dummy->fpu && + addr < (unsigned long) &dummy->u_fpvalid) { if (!tsk_used_math(child)) { - if (addr == (long)&dummy->fpu.fpscr) + if (addr == (unsigned long)&dummy->fpu.fpscr) tmp = FPSCR_INIT; else tmp = 0; } else - tmp = ((long *)child->thread.xstate) + tmp = ((unsigned long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2]; - } else if (addr == (long) &dummy->u_fpvalid) + } else if (addr == (unsigned long) &dummy->u_fpvalid) tmp = !!tsk_used_math(child); else if (addr == PT_TEXT_ADDR) tmp = child->mm->start_code; @@ -417,13 +418,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr < sizeof(struct pt_regs)) ret = put_stack_long(child, addr, data); - else if (addr >= (long) &dummy->fpu && - addr < (long) &dummy->u_fpvalid) { + else if (addr >= (unsigned long) &dummy->fpu && + addr < (unsigned long) &dummy->u_fpvalid) { set_stopped_child_used_math(child); - ((long *)child->thread.xstate) + ((unsigned long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2] = data; ret = 0; - } else if (addr == (long) &dummy->u_fpvalid) { + } else if (addr == (unsigned long) &dummy->u_fpvalid) { conditional_stopped_child_used_math(data, child); ret = 0; } diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index e0fb065914aa..4840716c196a 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -383,7 +383,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sh64_native_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -471,7 +472,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return ret; } -asmlinkage int sh64_ptrace(long request, long pid, long addr, long data) +asmlinkage int sh64_ptrace(long request, long pid, + unsigned long addr, unsigned long data) { #define WPC_DBRMODE 0x0d104008 static unsigned long first_call; diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index e608f397e11f..e08ba4a46acd 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -323,7 +323,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sparc32_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4]; const struct user_regset_view *view; diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index aa90da08bf61..d9db5a4dfef9 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -969,7 +969,8 @@ struct fps { unsigned long fsr; }; -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { const struct user_regset_view *view = task_user_regset_view(current); unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4]; @@ -977,8 +978,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) struct fps __user *fps; int ret; - pregs = (struct pt_regs __user *) (unsigned long) addr; - fps = (struct fps __user *) (unsigned long) addr; + pregs = (struct pt_regs __user *) addr; + fps = (struct fps __user *) addr; switch (request) { case PTRACE_PEEKUSR: diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 5b20c2874d51..f634729211d1 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -45,7 +45,8 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; @@ -98,7 +99,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __put_user(childregs[i], &datap[i]); if (ret != 0) break; @@ -109,7 +111,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __get_user(childregs[i], &datap[i]); if (ret != 0) break; diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index e0510496596c..963d82bdec06 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -42,10 +42,11 @@ void ptrace_disable(struct task_struct *child) extern int peek_user(struct task_struct * child, long addr, long data); extern int poke_user(struct task_struct * child, long addr, long data); -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int i, ret; - unsigned long __user *p = (void __user *)(unsigned long)data; + unsigned long __user *p = (void __user *)data; switch (request) { /* read word at location addr. */ diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index c9b176534d65..d23b2d3ea384 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -203,8 +203,8 @@ int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) (unsigned long *) &fpregs); } -long subarch_ptrace(struct task_struct *child, long request, long addr, - long data) +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { return -EIO; } diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index f3458d7d1c5a..67e63680df28 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -175,8 +175,8 @@ int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) return restore_fp_registers(userspace_pid[cpu], fpregs); } -long subarch_ptrace(struct task_struct *child, long request, long addr, - long data) +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EIO; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 70c4872cd8aa..1a7ca045920d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child) static const struct user_regset_view user_x86_32_view; /* Initialized below. */ #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -888,14 +889,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case PTRACE_GET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, (struct user_desc __user *) data); break; case PTRACE_SET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, (struct user_desc __user *) data, 0); diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 9d4e1ceb3f09..af9ba80f254c 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -256,7 +256,8 @@ int ptrace_pokeusr(struct task_struct *child, long regno, long val) return 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67a4cd77c352..092a04f874a8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -100,7 +100,8 @@ #include /* For struct task_struct. */ -extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); +extern long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data); extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); From eb5a3699311ba8ed22b7b38ceb3bb1411e438e2a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:48 -0700 Subject: [PATCH 055/139] ptrace: cleanup arch_ptrace() on x86 Remove checking @addr less than 0 because @addr is now unsigned and use new udescp variable in order to remove unnecessary castings. [akpm@linux-foundation.org: fix unused variable 'udescp'] Signed-off-by: Namhyung Kim Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/ptrace.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 1a7ca045920d..45892dc4b72a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -813,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; tmp = 0; /* Default return condition */ @@ -831,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; if (addr < sizeof(struct user_regs_struct)) @@ -892,14 +890,14 @@ long arch_ptrace(struct task_struct *child, long request, if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, - (struct user_desc __user *) data); + (struct user_desc __user *)data); break; case PTRACE_SET_THREAD_AREA: if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, - (struct user_desc __user *) data, 0); + (struct user_desc __user *)data, 0); break; #endif From b640a0d192265c47bbf60951115bdb59d2c017d1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:48 -0700 Subject: [PATCH 056/139] ptrace: cleanup arch_ptrace() on ARM use new 'datap' variable in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/ptrace.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 9bca6165459e..3e97483abcf0 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1079,10 +1079,11 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { case PTRACE_PEEKUSR: - ret = ptrace_read_user(child, addr, (unsigned long __user *)data); + ret = ptrace_read_user(child, addr, datap); break; case PTRACE_POKEUSR: @@ -1090,34 +1091,34 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, (void __user *)data); + ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (void __user *)data); + ret = ptrace_setregs(child, datap); break; case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, (void __user *)data); + ret = ptrace_getfpregs(child, datap); break; case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, (void __user *)data); + ret = ptrace_setfpregs(child, datap); break; #ifdef CONFIG_IWMMXT case PTRACE_GETWMMXREGS: - ret = ptrace_getwmmxregs(child, (void __user *)data); + ret = ptrace_getwmmxregs(child, datap); break; case PTRACE_SETWMMXREGS: - ret = ptrace_setwmmxregs(child, (void __user *)data); + ret = ptrace_setwmmxregs(child, datap); break; #endif case PTRACE_GET_THREAD_AREA: ret = put_user(task_thread_info(child)->tp_value, - (unsigned long __user *) data); + datap); break; case PTRACE_SET_SYSCALL: @@ -1127,21 +1128,21 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_CRUNCH case PTRACE_GETCRUNCHREGS: - ret = ptrace_getcrunchregs(child, (void __user *)data); + ret = ptrace_getcrunchregs(child, datap); break; case PTRACE_SETCRUNCHREGS: - ret = ptrace_setcrunchregs(child, (void __user *)data); + ret = ptrace_setcrunchregs(child, datap); break; #endif #ifdef CONFIG_VFP case PTRACE_GETVFPREGS: - ret = ptrace_getvfpregs(child, (void __user *)data); + ret = ptrace_getvfpregs(child, datap); break; case PTRACE_SETVFPREGS: - ret = ptrace_setvfpregs(child, (void __user *)data); + ret = ptrace_setvfpregs(child, datap); break; #endif From 9f29b8fb416a0ad49d5077ab10ed780efdfcb126 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:50 -0700 Subject: [PATCH 057/139] ptrace: cleanup arch_ptrace() on avr32 use new 'datap' variable type of void pointer in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Acked-by: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/kernel/ptrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index ecea9b6bfab4..4aedcab7cd4b 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c @@ -150,6 +150,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + void __user *datap = (void __user *) data; switch (request) { /* Read the word at location addr in the child process */ @@ -159,8 +160,7 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_PEEKUSR: - ret = ptrace_read_user(child, addr, - (unsigned long __user *)data); + ret = ptrace_read_user(child, addr, datap); break; /* Write the word in data at location addr */ @@ -174,11 +174,11 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, (void __user *)data); + ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (const void __user *)data); + ret = ptrace_setregs(child, datap); break; default: From aeebd3a3d9f5b67a254f04e6eac91a74c5e1065e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:51 -0700 Subject: [PATCH 058/139] ptrace: cleanup arch_ptrace() and friends on Blackfin Change signature of get/put_reg() according to the change of arch_ptrace() and remove unnecessary castings. Signed-off-by: Namhyung Kim Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/blackfin/kernel/ptrace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8e3083ccd88a..75089f80855d 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -38,12 +38,13 @@ * Get contents of register REGNO in task TASK. */ static inline long -get_reg(struct task_struct *task, long regno, unsigned long __user *datap) +get_reg(struct task_struct *task, unsigned long regno, + unsigned long __user *datap) { long tmp; struct pt_regs *regs = task_pt_regs(task); - if (regno & 3 || regno > PT_LAST_PSEUDO || regno < 0) + if (regno & 3 || regno > PT_LAST_PSEUDO) return -EIO; switch (regno) { @@ -74,11 +75,11 @@ get_reg(struct task_struct *task, long regno, unsigned long __user *datap) * Write contents of register REGNO in task TASK. */ static inline int -put_reg(struct task_struct *task, long regno, unsigned long data) +put_reg(struct task_struct *task, unsigned long regno, unsigned long data) { struct pt_regs *regs = task_pt_regs(task); - if (regno & 3 || regno > PT_LAST_PSEUDO || regno < 0) + if (regno & 3 || regno > PT_LAST_PSEUDO) return -EIO; switch (regno) { @@ -369,14 +370,14 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_bfin_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (void __user *)data); + datap); case PTRACE_SETREGS: pr_debug("ptrace: PTRACE_SETREGS\n"); return copy_regset_from_user(child, &user_bfin_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (const void __user *)data); + datap); case_default: default: From 475a4b813816fe9af4cdf7833799036b679f46d0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:51 -0700 Subject: [PATCH 059/139] ptrace: cleanup arch_ptrace() on cris Use new 'regno' variable in order to remove redandunt expression and remove checking @addr less than 0 because @addr is now unsigned. Also update 'datap' on PTRACE_GET/SETREGS to fix a bug on arch-v10. Signed-off-by: Namhyung Kim Acked-by: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/ptrace.c | 17 ++++++++--------- arch/cris/arch-v32/kernel/ptrace.c | 13 ++++++------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index d411e024e05f..320065f3cbe5 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -80,6 +80,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + unsigned int regno = addr >> 2; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { @@ -94,10 +95,10 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; ret = -EIO; - if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) + if ((addr & 3) || regno > PT_MAX) break; - tmp = get_reg(child, addr >> 2); + tmp = get_reg(child, regno); ret = put_user(tmp, datap); break; } @@ -111,19 +112,17 @@ long arch_ptrace(struct task_struct *child, long request, /* Write the word at location address in the USER area. */ case PTRACE_POKEUSR: ret = -EIO; - if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) + if ((addr & 3) || regno > PT_MAX) break; - addr >>= 2; - - if (addr == PT_DCCR) { + if (regno == PT_DCCR) { /* don't allow the tracing process to change stuff like * interrupt enable, kernel/user bit, dma enables etc. */ data &= DCCR_MASK; data |= get_reg(child, PT_DCCR) & ~DCCR_MASK; } - if (put_reg(child, addr, data)) + if (put_reg(child, regno, data)) break; ret = 0; break; @@ -142,7 +141,7 @@ long arch_ptrace(struct task_struct *child, long request, break; } - data += sizeof(unsigned long); + datap++; } break; @@ -166,7 +165,7 @@ long arch_ptrace(struct task_struct *child, long request, } put_reg(child, i, tmp); - data += sizeof(unsigned long); + datap++; } break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index 3e058a121753..511ece94a574 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -130,6 +130,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + unsigned int regno = addr >> 2; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { @@ -164,10 +165,10 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; ret = -EIO; - if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) + if ((addr & 3) || regno > PT_MAX) break; - tmp = get_reg(child, addr >> 2); + tmp = get_reg(child, regno); ret = put_user(tmp, datap); break; } @@ -181,19 +182,17 @@ long arch_ptrace(struct task_struct *child, long request, /* Write the word at location address in the USER area. */ case PTRACE_POKEUSR: ret = -EIO; - if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) + if ((addr & 3) || regno > PT_MAX) break; - addr >>= 2; - - if (addr == PT_CCS) { + if (regno == PT_CCS) { /* don't allow the tracing process to change stuff like * interrupt enable, kernel/user bit, dma enables etc. */ data &= CCS_MASK; data |= get_reg(child, PT_CCS) & ~CCS_MASK; } - if (put_reg(child, addr, data)) + if (put_reg(child, regno, data)) break; ret = 0; break; From 81d3285c96dd90044ffe2c2c5d56405a5423d753 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:52 -0700 Subject: [PATCH 060/139] ptrace: cleanup arch_ptrace() on frv Use new 'regno', 'datap' variables in order to remove duplicated expressions and unnecessary castings. Alse remove checking @addr less than 0 because addr is now unsigned. Signed-off-by: Namhyung Kim Cc: David Howells Cc: "Daniel K." Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/ptrace.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index e9dbfad93589..9d68f7fac730 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -259,19 +259,21 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long tmp; int ret; + int regno = addr >> 2; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { tmp = 0; ret = -EIO; - if ((addr & 3) || addr < 0) + if (addr & 3) break; ret = 0; - switch (addr >> 2) { + switch (regno) { case 0 ... PT__END - 1: - tmp = get_reg(child, addr >> 2); + tmp = get_reg(child, regno); break; case PT__END + 0: @@ -300,23 +302,18 @@ long arch_ptrace(struct task_struct *child, long request, } if (ret == 0) - ret = put_user(tmp, (unsigned long *) data); + ret = put_user(tmp, datap); break; } case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & 3) || addr < 0) + if (addr & 3) break; - ret = 0; - switch (addr >> 2) { + switch (regno) { case 0 ... PT__END - 1: - ret = put_reg(child, addr >> 2, data); - break; - - default: - ret = -EIO; + ret = put_reg(child, regno, data); break; } break; @@ -325,25 +322,25 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_frv_native_view, REGSET_GENERAL, 0, sizeof(child->thread.user->i), - (void __user *)data); + datap); case PTRACE_SETREGS: /* Set all integer regs in the child. */ return copy_regset_from_user(child, &user_frv_native_view, REGSET_GENERAL, 0, sizeof(child->thread.user->i), - (const void __user *)data); + datap); case PTRACE_GETFPREGS: /* Get the child FP/Media state. */ return copy_regset_to_user(child, &user_frv_native_view, REGSET_FPMEDIA, 0, sizeof(child->thread.user->f), - (void __user *)data); + datap); case PTRACE_SETFPREGS: /* Set the child FP/Media state. */ return copy_regset_from_user(child, &user_frv_native_view, REGSET_FPMEDIA, 0, sizeof(child->thread.user->f), - (const void __user *)data); + datap); default: ret = ptrace_request(child, request, addr, data); From 05fabdace41837a21f957507378b2954fe87ab61 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:53 -0700 Subject: [PATCH 061/139] ptrace: cleanup arch_ptrace() on h8300 Use new 'regno', 'datap' variables in order to remove duplicated expressions and unnecessary castings. Alse remove checking @addr less than 0 because addr is now unsigned. Signed-off-by: Namhyung Kim Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/h8300/kernel/ptrace.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index ef1aa0b8b20f..497fa89b5df4 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -54,24 +54,25 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + int regno = addr >> 2; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp = 0; - if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { + if ((addr & 3) || addr >= sizeof(struct user)) { ret = -EIO; break ; } ret = 0; /* Default return condition */ - addr = addr >> 2; /* temporary hack. */ - if (addr < H8300_REGS_NO) - tmp = h8300_get_reg(child, addr); + if (regno < H8300_REGS_NO) + tmp = h8300_get_reg(child, regno); else { - switch(addr) { + switch (regno) { case 49: tmp = child->mm->start_code; break ; @@ -89,24 +90,23 @@ long arch_ptrace(struct task_struct *child, long request, } } if (!ret) - ret = put_user(tmp,(unsigned long *) data); + ret = put_user(tmp, datap); break ; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { + if ((addr & 3) || addr >= sizeof(struct user)) { ret = -EIO; break ; } - addr = addr >> 2; /* temporary hack. */ - if (addr == PT_ORIG_ER0) { + if (regno == PT_ORIG_ER0) { ret = -EIO; break ; } - if (addr < H8300_REGS_NO) { - ret = h8300_put_reg(child, addr, data); + if (regno < H8300_REGS_NO) { + ret = h8300_put_reg(child, regno, data); break ; } ret = -EIO; @@ -117,11 +117,11 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { tmp = h8300_get_reg(child, i); - if (put_user(tmp, (unsigned long *) data)) { + if (put_user(tmp, datap)) { ret = -EFAULT; break; } - data += sizeof(unsigned long); + datap++; } ret = 0; break; @@ -131,12 +131,12 @@ long arch_ptrace(struct task_struct *child, long request, int i; unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { - if (get_user(tmp, (unsigned long *) data)) { + if (get_user(tmp, datap)) { ret = -EFAULT; break; } h8300_put_reg(child, i, tmp); - data += sizeof(unsigned long); + datap++; } ret = 0; break; From a68caa035a0ffb895baa829edec0545f17d88c07 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:54 -0700 Subject: [PATCH 062/139] ptrace: cleanup arch_ptrace() on m32r Use new 'datap' variable in order to remove duplicated castings. Signed-off-by: Namhyung Kim Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/ptrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 5e00e0a41fff..20743754f2b2 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -626,6 +626,7 @@ arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* @@ -640,8 +641,7 @@ arch_ptrace(struct task_struct *child, long request, * read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: - ret = ptrace_read_user(child, addr, - (unsigned long __user *)data); + ret = ptrace_read_user(child, addr, datap); break; /* @@ -662,11 +662,11 @@ arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, (void __user *)data); + ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (void __user *)data); + ret = ptrace_setregs(child, datap); break; default: From add3d6e8b1372646f3cb8cb56d5840c91afc85e0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:55 -0700 Subject: [PATCH 063/139] ptrace: cleanup arch_ptrace() on m68k Use new 'regno', 'datap' variables in order to remove duplicated expressions and unnecessary castings. Signed-off-by: Namhyung Kim Acked-by: Geert Uytterhoeven Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/kernel/ptrace.c | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 583f59fcc363..0b252683cefb 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -161,51 +161,52 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long tmp; int i, ret = 0; + int regno = addr >> 2; /* temporary hack. */ + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: if (addr & 3) goto out_eio; - addr >>= 2; /* temporary hack. */ - if (addr >= 0 && addr < 19) { - tmp = get_reg(child, addr); - } else if (addr >= 21 && addr < 49) { - tmp = child->thread.fp[addr - 21]; + if (regno >= 0 && regno < 19) { + tmp = get_reg(child, regno); + } else if (regno >= 21 && regno < 49) { + tmp = child->thread.fp[regno - 21]; /* Convert internal fpu reg representation * into long double format */ - if (FPU_IS_EMU && (addr < 45) && !(addr % 3)) + if (FPU_IS_EMU && (regno < 45) && !(regno % 3)) tmp = ((tmp & 0xffff0000) << 15) | ((tmp & 0x0000ffff) << 16); } else goto out_eio; - ret = put_user(tmp, (unsigned long *)data); + ret = put_user(tmp, datap); break; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: + /* write the word at location addr in the USER area */ if (addr & 3) goto out_eio; - addr >>= 2; /* temporary hack. */ - if (addr == PT_SR) { + if (regno == PT_SR) { data &= SR_MASK; data |= get_reg(child, PT_SR) & ~SR_MASK; } - if (addr >= 0 && addr < 19) { - if (put_reg(child, addr, data)) + if (regno >= 0 && regno < 19) { + if (put_reg(child, regno, data)) goto out_eio; - } else if (addr >= 21 && addr < 48) { + } else if (regno >= 21 && regno < 48) { /* Convert long double format * into internal fpu reg representation */ - if (FPU_IS_EMU && (addr < 45) && !(addr % 3)) { + if (FPU_IS_EMU && (regno < 45) && !(regno % 3)) { data <<= 15; data = (data & 0xffff0000) | ((data & 0x0000ffff) >> 1); } - child->thread.fp[addr - 21] = data; + child->thread.fp[regno - 21] = data; } else goto out_eio; break; @@ -213,16 +214,16 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GETREGS: /* Get all gp regs from the child. */ for (i = 0; i < 19; i++) { tmp = get_reg(child, i); - ret = put_user(tmp, (unsigned long *)data); + ret = put_user(tmp, datap); if (ret) break; - data += sizeof(unsigned long); + datap++; } break; case PTRACE_SETREGS: /* Set all gp regs in the child. */ for (i = 0; i < 19; i++) { - ret = get_user(tmp, (unsigned long *)data); + ret = get_user(tmp, datap); if (ret) break; if (i == PT_SR) { @@ -230,25 +231,24 @@ long arch_ptrace(struct task_struct *child, long request, tmp |= get_reg(child, PT_SR) & ~SR_MASK; } put_reg(child, i, tmp); - data += sizeof(unsigned long); + datap++; } break; case PTRACE_GETFPREGS: /* Get the child FPU state. */ - if (copy_to_user((void *)data, &child->thread.fp, + if (copy_to_user(datap, &child->thread.fp, sizeof(struct user_m68kfp_struct))) ret = -EFAULT; break; case PTRACE_SETFPREGS: /* Set the child FPU state. */ - if (copy_from_user(&child->thread.fp, (void *)data, + if (copy_from_user(&child->thread.fp, datap, sizeof(struct user_m68kfp_struct))) ret = -EFAULT; break; case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, - (unsigned long __user *)data); + ret = put_user(task_thread_info(child)->tp_value, datap); break; default: From bf60ef96072ebbf768bae1f9ab421ceef5b80676 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:56 -0700 Subject: [PATCH 064/139] ptrace: cleanup arch_ptrace() on m68knommu Use new 'regno', 'datap' variables in order to remove duplicated expressions and unnecessary castings. Alse remove checking @addr less than 0 because addr is now unsigned. Signed-off-by: Namhyung Kim Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68knommu/kernel/ptrace.c | 54 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 7dbb08f5534e..6709fb707335 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -116,6 +116,8 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + int regno = addr >> 2; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ @@ -123,53 +125,48 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 3) || addr > sizeof(struct user) - 3) break; tmp = 0; /* Default return condition */ - addr = addr >> 2; /* temporary hack. */ ret = -EIO; - if (addr < 19) { - tmp = get_reg(child, addr); - if (addr == PT_SR) + if (regno < 19) { + tmp = get_reg(child, regno); + if (regno == PT_SR) tmp >>= 16; - } else if (addr >= 21 && addr < 49) { - tmp = child->thread.fp[addr - 21]; - } else if (addr == 49) { + } else if (regno >= 21 && regno < 49) { + tmp = child->thread.fp[regno - 21]; + } else if (regno == 49) { tmp = child->mm->start_code; - } else if (addr == 50) { + } else if (regno == 50) { tmp = child->mm->start_data; - } else if (addr == 51) { + } else if (regno == 51) { tmp = child->mm->end_code; } else break; - ret = put_user(tmp,(unsigned long *) data); + ret = put_user(tmp, datap); break; } case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 3) || addr > sizeof(struct user) - 3) break; - addr = addr >> 2; /* temporary hack. */ - - if (addr == PT_SR) { + if (regno == PT_SR) { data &= SR_MASK; data <<= 16; data |= get_reg(child, PT_SR) & ~(SR_MASK << 16); } - if (addr < 19) { - if (put_reg(child, addr, data)) + if (regno < 19) { + if (put_reg(child, regno, data)) break; ret = 0; break; } - if (addr >= 21 && addr < 48) + if (regno >= 21 && regno < 48) { - child->thread.fp[addr - 21] = data; + child->thread.fp[regno - 21] = data; ret = 0; } break; @@ -181,11 +178,11 @@ long arch_ptrace(struct task_struct *child, long request, tmp = get_reg(child, i); if (i == PT_SR) tmp >>= 16; - if (put_user(tmp, (unsigned long *) data)) { + if (put_user(tmp, datap)) { ret = -EFAULT; break; } - data += sizeof(unsigned long); + datap++; } ret = 0; break; @@ -195,7 +192,7 @@ long arch_ptrace(struct task_struct *child, long request, int i; unsigned long tmp; for (i = 0; i < 19; i++) { - if (get_user(tmp, (unsigned long *) data)) { + if (get_user(tmp, datap)) { ret = -EFAULT; break; } @@ -205,7 +202,7 @@ long arch_ptrace(struct task_struct *child, long request, tmp |= get_reg(child, PT_SR) & ~(SR_MASK << 16); } put_reg(child, i, tmp); - data += sizeof(unsigned long); + datap++; } ret = 0; break; @@ -214,7 +211,7 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef PTRACE_GETFPREGS case PTRACE_GETFPREGS: { /* Get the child FPU state. */ ret = 0; - if (copy_to_user((void *)data, &child->thread.fp, + if (copy_to_user(datap, &child->thread.fp, sizeof(struct user_m68kfp_struct))) ret = -EFAULT; break; @@ -224,7 +221,7 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef PTRACE_SETFPREGS case PTRACE_SETFPREGS: { /* Set the child FPU state. */ ret = 0; - if (copy_from_user(&child->thread.fp, (void *)data, + if (copy_from_user(&child->thread.fp, datap, sizeof(struct user_m68kfp_struct))) ret = -EFAULT; break; @@ -232,8 +229,7 @@ long arch_ptrace(struct task_struct *child, long request, #endif case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, - (unsigned long __user *)data); + ret = put_user(task_thread_info(child)->tp_value, datap); break; default: From cfd866f6bd1549fb25f826c469120a8eaee4fc1a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:57 -0700 Subject: [PATCH 065/139] ptrace: cleanup arch_ptrace() on microblaze Remove checking @addr greater than 0 because @addr is now unsigned. Signed-off-by: Namhyung Kim Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 3544bc157406..05ac8cc975d5 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -100,7 +100,7 @@ long arch_ptrace(struct task_struct *child, long request, } else { rval = -EIO; } - } else if (addr >= 0 && addr < PT_SIZE && (addr & 0x3) == 0) { + } else if (addr < PT_SIZE && (addr & 0x3) == 0) { microblaze_reg_t *reg_addr = reg_save_addr(addr, child); if (request == PTRACE_PEEKUSR) val = *reg_addr; From fb671139a27abc44303ef938c3811d910724c493 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:58 -0700 Subject: [PATCH 066/139] ptrace: cleanup arch_ptrace() on MIPS Use new 'addrp', 'datavp' and 'datalp' variables in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/ptrace.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 95c3ae8b198c..d21c388c0116 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -259,6 +259,9 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + void __user *addrp = (void __user *) addr; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = (void __user *) data; switch (request) { /* when I and D space are separate, these will need to be fixed. */ @@ -387,7 +390,7 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; goto out; } - ret = put_user(tmp, (unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } @@ -479,34 +482,31 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_GETREGS: - ret = ptrace_getregs(child, (__s64 __user *) data); + ret = ptrace_getregs(child, datavp); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (__s64 __user *) data); + ret = ptrace_setregs(child, datavp); break; case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, (__u32 __user *) data); + ret = ptrace_getfpregs(child, datavp); break; case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, (__u32 __user *) data); + ret = ptrace_setfpregs(child, datavp); break; case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, - (unsigned long __user *) data); + ret = put_user(task_thread_info(child)->tp_value, datalp); break; case PTRACE_GET_WATCH_REGS: - ret = ptrace_get_watch_regs(child, - (struct pt_watch_regs __user *) addr); + ret = ptrace_get_watch_regs(child, addrp); break; case PTRACE_SET_WATCH_REGS: - ret = ptrace_set_watch_regs(child, - (struct pt_watch_regs __user *) addr); + ret = ptrace_set_watch_regs(child, addrp); break; default: From 261bb92054b9e0835c176a27967dd9eb326d010c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:58 -0700 Subject: [PATCH 067/139] ptrace: cleanup arch_ptrace() on mn10300 Use new 'datap' variable in order to remove unnecessary castings. Also remove checking @addr less than 0 because @addr is now unsigned. Signed-off-by: Namhyung Kim Cc: David Howells Cc: Koichi Yasutake Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/kernel/ptrace.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index ec4b41439e99..5c0b07e61006 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -300,27 +300,26 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long tmp; int ret; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 3) || addr > sizeof(struct user) - 3) break; tmp = 0; /* Default return condition */ if (addr < NR_PTREGS << 2) tmp = get_stack_long(child, ptrace_regid_to_frame[addr]); - ret = put_user(tmp, (unsigned long *) data); + ret = put_user(tmp, datap); break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 3) || addr > sizeof(struct user) - 3) break; ret = 0; @@ -333,25 +332,25 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_mn10300_native_view, REGSET_GENERAL, 0, NR_PTREGS * sizeof(long), - (void __user *)data); + datap); case PTRACE_SETREGS: /* Set all integer regs in the child. */ return copy_regset_from_user(child, &user_mn10300_native_view, REGSET_GENERAL, 0, NR_PTREGS * sizeof(long), - (const void __user *)data); + datap); case PTRACE_GETFPREGS: /* Get the child FPU state. */ return copy_regset_to_user(child, &user_mn10300_native_view, REGSET_FPU, 0, sizeof(struct fpu_state_struct), - (void __user *)data); + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ return copy_regset_from_user(child, &user_mn10300_native_view, REGSET_FPU, 0, sizeof(struct fpu_state_struct), - (const void __user *)data); + datap); default: ret = ptrace_request(child, request, addr, data); From 1cb4953c608d01d8ea522e2cf379e5e90f06d7a3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:59 -0700 Subject: [PATCH 068/139] ptrace: cleanup arch_ptrace() on parisc Add missing __user markup on the argument of put_user(). Signed-off-by: Namhyung Kim Cc: Kyle McMartin Cc: Helge Deller Cc: "James E.J. Bottomley" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 03920db4af45..2905b1f52d30 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -125,7 +125,7 @@ long arch_ptrace(struct task_struct *child, long request, addr >= sizeof(struct pt_regs)) break; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); - ret = put_user(tmp, (unsigned long *) data); + ret = put_user(tmp, (unsigned long __user *) data); break; /* Write the word at location addr in the USER area. This will need From f68d2048206389603d646b06e3cc16f1bbc3ff88 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:01 -0700 Subject: [PATCH 069/139] ptrace: cleanup arch_ptrace() on powerpc Use new 'datavp' and 'datalp' variables in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/ptrace.c | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 136763568a7b..a9b32967cff6 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1409,26 +1409,28 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) static long arch_ptrace_old(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + void __user *datavp = (void __user *) data; + switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (void __user *) data); + datavp); case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (const void __user *) data); + datavp); case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); } return -EPERM; @@ -1438,6 +1440,8 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; switch (request) { /* read the word at location addr in the USER area. */ @@ -1464,7 +1468,7 @@ long arch_ptrace(struct task_struct *child, long request, tmp = ((unsigned long *)child->thread.fpr) [TS_FPRWIDTH * (index - PT_FPR0)]; } - ret = put_user(tmp,(unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } @@ -1526,11 +1530,11 @@ long arch_ptrace(struct task_struct *child, long request, dbginfo.features = 0; #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - if (!access_ok(VERIFY_WRITE, data, + if (!access_ok(VERIFY_WRITE, datavp, sizeof(struct ppc_debug_info))) return -EFAULT; - ret = __copy_to_user((struct ppc_debug_info __user *)data, - &dbginfo, sizeof(struct ppc_debug_info)) ? + ret = __copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info)) ? -EFAULT : 0; break; } @@ -1538,11 +1542,10 @@ long arch_ptrace(struct task_struct *child, long request, case PPC_PTRACE_SETHWDEBUG: { struct ppc_hw_breakpoint bp_info; - if (!access_ok(VERIFY_READ, data, + if (!access_ok(VERIFY_READ, datavp, sizeof(struct ppc_hw_breakpoint))) return -EFAULT; - ret = __copy_from_user(&bp_info, - (struct ppc_hw_breakpoint __user *)data, + ret = __copy_from_user(&bp_info, datavp, sizeof(struct ppc_hw_breakpoint)) ? -EFAULT : 0; if (!ret) @@ -1561,11 +1564,9 @@ long arch_ptrace(struct task_struct *child, long request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, - (unsigned long __user *)data); + ret = put_user(child->thread.dac1, datalp); #else - ret = put_user(child->thread.dabr, - (unsigned long __user *)data); + ret = put_user(child->thread.dabr, datalp); #endif break; } @@ -1581,7 +1582,7 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (void __user *) data); + datavp); #ifdef CONFIG_PPC64 case PTRACE_SETREGS64: @@ -1590,19 +1591,19 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (const void __user *) data); + datavp); case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (void __user *) data); + datavp); case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (const void __user *) data); + datavp); #ifdef CONFIG_ALTIVEC case PTRACE_GETVRREGS: @@ -1610,40 +1611,40 @@ long arch_ptrace(struct task_struct *child, long request, REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (void __user *) data); + datavp); case PTRACE_SETVRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_VSX case PTRACE_GETVSRREGS: return copy_regset_to_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PTRACE_SETVSRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_SPE case PTRACE_GETEVRREGS: /* Get the child spe register state. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (void __user *) data); + datavp); case PTRACE_SETEVRREGS: /* Set the child spe register state. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (const void __user *) data); + datavp); #endif /* Old reverse args ptrace callss */ From 41a2437eb1d175efe1958e283241a66a871dbdea Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:01 -0700 Subject: [PATCH 070/139] ptrace: cleanup arch_ptrace() on score Remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 894dcbf72099..55836188b217 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -336,14 +336,14 @@ arch_ptrace(struct task_struct *child, long request, ret = copy_regset_to_user(child, &user_score_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (void __user *)datap); + datap); break; case PTRACE_SETREGS: ret = copy_regset_from_user(child, &user_score_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (const void __user *)datap); + datap); break; default: From 9e1cb20619d8037248b6a776f777600c3584a8d5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:03 -0700 Subject: [PATCH 071/139] ptrace: cleanup arch_ptrace() on sh Remove unnecessary castings and get rid of dummy pointer in favor of offsetof() macro in ptrace_32.c. Also use temporary variables and break long lines in order to improve readability. Signed-off-by: Namhyung Kim Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/ptrace_32.c | 38 +++++++++++++++++++++----------------- arch/sh/kernel/ptrace_64.c | 19 ++++++++++++------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 34bf03745e86..90a15d29feeb 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -368,7 +368,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { - struct user * dummy = NULL; unsigned long __user *datap = (unsigned long __user *)data; int ret; @@ -384,17 +383,20 @@ long arch_ptrace(struct task_struct *child, long request, if (addr < sizeof(struct pt_regs)) tmp = get_stack_long(child, addr); - else if (addr >= (unsigned long) &dummy->fpu && - addr < (unsigned long) &dummy->u_fpvalid) { + else if (addr >= offsetof(struct user, fpu) && + addr < offsetof(struct user, u_fpvalid)) { if (!tsk_used_math(child)) { - if (addr == (unsigned long)&dummy->fpu.fpscr) + if (addr == offsetof(struct user, fpu.fpscr)) tmp = FPSCR_INIT; else tmp = 0; - } else + } else { + unsigned long index; + index = addr - offsetof(struct user, fpu); tmp = ((unsigned long *)child->thread.xstate) - [(addr - (long)&dummy->fpu) >> 2]; - } else if (addr == (unsigned long) &dummy->u_fpvalid) + [index >> 2]; + } + } else if (addr == offsetof(struct user, u_fpvalid)) tmp = !!tsk_used_math(child); else if (addr == PT_TEXT_ADDR) tmp = child->mm->start_code; @@ -418,13 +420,15 @@ long arch_ptrace(struct task_struct *child, long request, if (addr < sizeof(struct pt_regs)) ret = put_stack_long(child, addr, data); - else if (addr >= (unsigned long) &dummy->fpu && - addr < (unsigned long) &dummy->u_fpvalid) { + else if (addr >= offsetof(struct user, fpu) && + addr < offsetof(struct user, u_fpvalid)) { + unsigned long index; + index = addr - offsetof(struct user, fpu); set_stopped_child_used_math(child); ((unsigned long *)child->thread.xstate) - [(addr - (long)&dummy->fpu) >> 2] = data; + [index >> 2] = data; ret = 0; - } else if (addr == (unsigned long) &dummy->u_fpvalid) { + } else if (addr == offsetof(struct user, u_fpvalid)) { conditional_stopped_child_used_math(data, child); ret = 0; } @@ -434,35 +438,35 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_sh_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (void __user *)data); + datap); case PTRACE_SETREGS: return copy_regset_from_user(child, &user_sh_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (const void __user *)data); + datap); #ifdef CONFIG_SH_FPU case PTRACE_GETFPREGS: return copy_regset_to_user(child, &user_sh_native_view, REGSET_FPU, 0, sizeof(struct user_fpu_struct), - (void __user *)data); + datap); case PTRACE_SETFPREGS: return copy_regset_from_user(child, &user_sh_native_view, REGSET_FPU, 0, sizeof(struct user_fpu_struct), - (const void __user *)data); + datap); #endif #ifdef CONFIG_SH_DSP case PTRACE_GETDSPREGS: return copy_regset_to_user(child, &user_sh_native_view, REGSET_DSP, 0, sizeof(struct pt_dspregs), - (void __user *)data); + datap); case PTRACE_SETDSPREGS: return copy_regset_from_user(child, &user_sh_native_view, REGSET_DSP, 0, sizeof(struct pt_dspregs), - (const void __user *)data); + datap); #endif default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 4840716c196a..4436eacddb15 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -387,6 +387,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + unsigned long __user *datap = (unsigned long __user *) data; switch (request) { /* read the word at location addr in the USER area. */ @@ -401,13 +402,15 @@ long arch_ptrace(struct task_struct *child, long request, tmp = get_stack_long(child, addr); else if ((addr >= offsetof(struct user, fpu)) && (addr < offsetof(struct user, u_fpvalid))) { - tmp = get_fpu_long(child, addr - offsetof(struct user, fpu)); + unsigned long index; + index = addr - offsetof(struct user, fpu); + tmp = get_fpu_long(child, index); } else if (addr == offsetof(struct user, u_fpvalid)) { tmp = !!tsk_used_math(child); } else { break; } - ret = put_user(tmp, (unsigned long *)data); + ret = put_user(tmp, datap); break; } @@ -438,7 +441,9 @@ long arch_ptrace(struct task_struct *child, long request, } else if ((addr >= offsetof(struct user, fpu)) && (addr < offsetof(struct user, u_fpvalid))) { - ret = put_fpu_long(child, addr - offsetof(struct user, fpu), data); + unsigned long index; + index = addr - offsetof(struct user, fpu); + ret = put_fpu_long(child, index, data); } break; @@ -446,23 +451,23 @@ long arch_ptrace(struct task_struct *child, long request, return copy_regset_to_user(child, &user_sh64_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (void __user *)data); + datap); case PTRACE_SETREGS: return copy_regset_from_user(child, &user_sh64_native_view, REGSET_GENERAL, 0, sizeof(struct pt_regs), - (const void __user *)data); + datap); #ifdef CONFIG_SH_FPU case PTRACE_GETFPREGS: return copy_regset_to_user(child, &user_sh64_native_view, REGSET_FPU, 0, sizeof(struct user_fpu_struct), - (void __user *)data); + datap); case PTRACE_SETFPREGS: return copy_regset_from_user(child, &user_sh64_native_view, REGSET_FPU, 0, sizeof(struct user_fpu_struct), - (const void __user *)data); + datap); #endif default: ret = ptrace_request(child, request, addr, data); From a9384e23ab19eba0dedb861ebcba805b98a3b7a5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:03 -0700 Subject: [PATCH 072/139] ptrace: cleanup arch_ptrace() on sparc Factor out struct fps and remove redundant castings. Signed-off-by: Namhyung Kim Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/kernel/ptrace_32.c | 54 +++++++++++++---------------------- arch/sparc/kernel/ptrace_64.c | 8 +++--- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index e08ba4a46acd..27b9e93d0121 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -323,19 +323,35 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sparc32_view; } +struct fps { + unsigned long regs[32]; + unsigned long fsr; + unsigned long flags; + unsigned long extra; + unsigned long fpqd; + struct fq { + unsigned long *insnaddr; + unsigned long insn; + } fpq[16]; +}; + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4]; + void __user *addr2p; const struct user_regset_view *view; + struct pt_regs __user *pregs; + struct fps __user *fps; int ret; view = task_user_regset_view(current); + addr2p = (void __user *) addr2; + pregs = (struct pt_regs __user *) addr; + fps = (struct fps __user *) addr; switch(request) { case PTRACE_GETREGS: { - struct pt_regs __user *pregs = (struct pt_regs __user *) addr; - ret = copy_regset_to_user(child, view, REGSET_GENERAL, 32 * sizeof(u32), 4 * sizeof(u32), @@ -349,8 +365,6 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_SETREGS: { - struct pt_regs __user *pregs = (struct pt_regs __user *) addr; - ret = copy_regset_from_user(child, view, REGSET_GENERAL, 32 * sizeof(u32), 4 * sizeof(u32), @@ -364,19 +378,6 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_GETFPREGS: { - struct fps { - unsigned long regs[32]; - unsigned long fsr; - unsigned long flags; - unsigned long extra; - unsigned long fpqd; - struct fq { - unsigned long *insnaddr; - unsigned long insn; - } fpq[16]; - }; - struct fps __user *fps = (struct fps __user *) addr; - ret = copy_regset_to_user(child, view, REGSET_FP, 0 * sizeof(u32), 32 * sizeof(u32), @@ -398,19 +399,6 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_SETFPREGS: { - struct fps { - unsigned long regs[32]; - unsigned long fsr; - unsigned long flags; - unsigned long extra; - unsigned long fpqd; - struct fq { - unsigned long *insnaddr; - unsigned long insn; - } fpq[16]; - }; - struct fps __user *fps = (struct fps __user *) addr; - ret = copy_regset_from_user(child, view, REGSET_FP, 0 * sizeof(u32), 32 * sizeof(u32), @@ -425,8 +413,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_READTEXT: case PTRACE_READDATA: - ret = ptrace_readdata(child, addr, - (void __user *) addr2, data); + ret = ptrace_readdata(child, addr, addr2p, data); if (ret == data) ret = 0; @@ -436,8 +423,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_WRITETEXT: case PTRACE_WRITEDATA: - ret = ptrace_writedata(child, (void __user *) addr2, - addr, data); + ret = ptrace_writedata(child, addr2p, addr, data); if (ret == data) ret = 0; diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index d9db5a4dfef9..9ccc812bc09e 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -976,10 +976,12 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4]; struct pt_regs __user *pregs; struct fps __user *fps; + void __user *addr2p; int ret; pregs = (struct pt_regs __user *) addr; fps = (struct fps __user *) addr; + addr2p = (void __user *) addr2; switch (request) { case PTRACE_PEEKUSR: @@ -1030,8 +1032,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_READTEXT: case PTRACE_READDATA: - ret = ptrace_readdata(child, addr, - (char __user *)addr2, data); + ret = ptrace_readdata(child, addr, addr2p, data); if (ret == data) ret = 0; else if (ret >= 0) @@ -1040,8 +1041,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_WRITETEXT: case PTRACE_WRITEDATA: - ret = ptrace_writedata(child, (char __user *) addr2, - addr, data); + ret = ptrace_writedata(child, addr2p, addr, data); if (ret == data) ret = 0; else if (ret >= 0) From 8c0acac3676103113a2e259291a07c073ac07879 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:04 -0700 Subject: [PATCH 073/139] ptrace: cleanup arch_ptrace() on tile Remove checking @addr less than 0 because @addr is now unsigned. Signed-off-by: Namhyung Kim Acked-by: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index f634729211d1..9cd29884c09f 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -58,7 +58,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ - if (addr < 0 || addr >= PTREGS_SIZE) + if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; #ifdef CONFIG_COMPAT @@ -77,7 +77,7 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ - if (addr < 0 || addr >= PTREGS_SIZE) + if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; #ifdef CONFIG_COMPAT From 0a3d763f1a68c97daae57e40c6d698986d1b38f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:04 -0700 Subject: [PATCH 074/139] ptrace: cleanup arch_ptrace() on um Remove unnecessary castings using void pointer and fix copy_to_user() return value. Also add missing __user markup on the argument of arch_ptrctl(). Signed-off-by: Namhyung Kim Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/ptrace.c | 18 ++++++++---------- arch/um/sys-x86_64/ptrace.c | 7 +++---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 963d82bdec06..a5e33f29bbeb 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -47,6 +47,7 @@ long arch_ptrace(struct task_struct *child, long request, { int i, ret; unsigned long __user *p = (void __user *)data; + void __user *vp = p; switch (request) { /* read word at location addr. */ @@ -108,24 +109,20 @@ long arch_ptrace(struct task_struct *child, long request, #endif #ifdef PTRACE_GETFPREGS case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs((struct user_i387_struct __user *) data, - child); + ret = get_fpregs(vp, child); break; #endif #ifdef PTRACE_SETFPREGS case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs((struct user_i387_struct __user *) data, - child); + ret = set_fpregs(vp, child); break; #endif case PTRACE_GET_THREAD_AREA: - ret = ptrace_get_thread_area(child, addr, - (struct user_desc __user *) data); + ret = ptrace_get_thread_area(child, addr, vp); break; case PTRACE_SET_THREAD_AREA: - ret = ptrace_set_thread_area(child, addr, - (struct user_desc __user *) data); + ret = ptrace_set_thread_area(child, addr, datavp); break; case PTRACE_FAULTINFO: { @@ -135,7 +132,8 @@ long arch_ptrace(struct task_struct *child, long request, * On i386, ptrace_faultinfo is smaller! */ ret = copy_to_user(p, &child->thread.arch.faultinfo, - sizeof(struct ptrace_faultinfo)); + sizeof(struct ptrace_faultinfo)) ? + -EIO : 0; break; } @@ -159,7 +157,7 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef PTRACE_ARCH_PRCTL case PTRACE_ARCH_PRCTL: /* XXX Calls ptrace on the host - needs some SMP thinking */ - ret = arch_prctl(child, data, (void *) addr); + ret = arch_prctl(child, data, (void __user *) addr); break; #endif default: diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index 67e63680df28..f43613643cdb 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -179,15 +179,14 @@ long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret = -EIO; + void __user *datap = (void __user *) data; switch (request) { case PTRACE_GETFPXREGS: /* Get the child FPU state. */ - ret = get_fpregs((struct user_i387_struct __user *) data, - child); + ret = get_fpregs(datap, child); break; case PTRACE_SETFPXREGS: /* Set the child FPU state. */ - ret = set_fpregs((struct user_i387_struct __user *) data, - child); + ret = set_fpregs(datap, child); break; } From 5ef45079dd9c8f2e9c7aa788dc3121835ae52863 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:06 -0700 Subject: [PATCH 075/139] ptrace: cleanup arch_ptrace() on xtensa Use new 'datap' variable in order to remove unnecessary castings. Signed-off-by: Namhyung Kim Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/kernel/ptrace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index af9ba80f254c..c72c9473ef99 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -260,6 +260,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret = -EPERM; + void __user *datap = (void __user *) data; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -268,7 +269,7 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_PEEKUSR: /* read register specified by addr. */ - ret = ptrace_peekusr(child, addr, (void __user *) data); + ret = ptrace_peekusr(child, addr, datap); break; case PTRACE_POKETEXT: /* write the word at location addr. */ @@ -281,19 +282,19 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, (void __user *) data); + ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (void __user *) data); + ret = ptrace_setregs(child, datap); break; case PTRACE_GETXTREGS: - ret = ptrace_getxregs(child, (void __user *) data); + ret = ptrace_getxregs(child, datap); break; case PTRACE_SETXTREGS: - ret = ptrace_setxregs(child, (void __user *) data); + ret = ptrace_setxregs(child, datap); break; default: From b8ed374e202e23caaf9bd77dcadc9de6447faaa8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:06 -0700 Subject: [PATCH 076/139] signals: annotate lock_task_sighand() lock_task_sighand() grabs sighand->siglock in case of returning non-NULL but unlock_task_sighand() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_task_sighand() using __cond_lock() macro to make sparse happy. Suggested-by: Eric Dumazet Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 ++++++++- kernel/signal.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 393ce94e54b7..3ff5c8519abd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2236,9 +2236,16 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); +#define lock_task_sighand(tsk, flags) \ +({ struct sighand_struct *__ss; \ + __cond_lock(&(tsk)->sighand->siglock, \ + (__ss = __lock_task_sighand(tsk, flags))); \ + __ss; \ +}) \ + static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) { diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..e921409b85a9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) return count; } -struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) +struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) { struct sighand_struct *sighand; From b84011508360d6885a9d95a235ec77d56f133377 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:07 -0700 Subject: [PATCH 077/139] signals: annotate lock context change on ptrace_stop() ptrace_stop() releases and regrabs current->sighand->siglock but was missing proper annotation. Add it. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index e921409b85a9..4e3cff10fdce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1618,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk) * is gone, we keep current->exit_code unless clear_code. */ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) + __releases(¤t->sighand->siglock) + __acquires(¤t->sighand->siglock) { if (arch_ptrace_stop_needed(exit_code, info)) { /* From 9b1bf12d5d51bca178dea21b04a0805e29d60cf1 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 27 Oct 2010 15:34:08 -0700 Subject: [PATCH 078/139] signals: move cred_guard_mutex from task_struct to signal_struct Oleg Nesterov pointed out we have to prevent multiple-threads-inside-exec itself and we can reuse ->cred_guard_mutex for it. Yes, concurrent execve() has no worth. Let's move ->cred_guard_mutex from task_struct to signal_struct. It naturally prevent multiple-threads-inside-exec. Signed-off-by: KOSAKI Motohiro Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- fs/proc/base.c | 8 ++++---- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 7 ++++--- include/linux/tracehook.h | 2 +- kernel/cred.c | 4 +--- kernel/fork.c | 2 ++ kernel/ptrace.c | 4 ++-- 8 files changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 3aa75b8888a1..9722909c4d88 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1083,14 +1083,14 @@ EXPORT_SYMBOL(setup_new_exec); */ int prepare_bprm_creds(struct linux_binprm *bprm) { - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); return -ENOMEM; } @@ -1098,7 +1098,7 @@ void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } kfree(bprm); @@ -1119,13 +1119,13 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_guard_mutex to protect against + * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b094c1c8465..f3d02ca461ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; - if (mutex_lock_killable(&task->cred_guard_mutex)) + if (mutex_lock_killable(&task->signal->cred_guard_mutex)) return NULL; mm = get_task_mm(task); @@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mmput(mm); mm = NULL; } - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); return mm; } @@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out_free; /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->cred_guard_mutex); + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (length < 0) goto out_free; length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6ba..1f8c06ce0fa6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -29,6 +29,8 @@ extern struct fs_struct init_fs; .running = 0, \ .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ }, \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } extern struct nsproxy init_nsproxy; @@ -145,8 +147,6 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .cred_guard_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ff5c8519abd..be7adb7588e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,10 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -1305,9 +1309,6 @@ struct task_struct { * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 10db0102a890..3a2e66d88a32 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -150,7 +150,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) * * Return %LSM_UNSAFE_* bits applied to an exec because of tracing. * - * @task->cred_guard_mutex is held by the caller through the do_execve(). + * @task->signal->cred_guard_mutex is held by the caller through the do_execve(). */ static inline int tracehook_unsafe_exec(struct task_struct *task) { diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_guard_mutex + * - The caller must hold ->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_guard_mutex); - if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/fork.c b/kernel/fork.c index e87aaaaf5131..3b159c5991b7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + mutex_init(&sig->cred_guard_mutex); + return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ea7ce0215cd1..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) * under ptrace. */ retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->cred_guard_mutex)) + if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); @@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out: return retval; } From 1b0d300bd0f047e2edaf9d4b6784189e6c67c3d1 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 27 Oct 2010 15:34:08 -0700 Subject: [PATCH 079/139] core_pattern: fix truncation by core_pattern handler with long parameters We met a parameter truncated issue, consider following: > echo "|/root/core_pattern_pipe_test %p /usr/libexec/blah-blah-blah \ %s %c %p %u %g 11 12345678901234567890123456789012345678 %t" > \ /proc/sys/kernel/core_pattern This is okay because the strings is less than CORENAME_MAX_SIZE. "cat /proc/sys/kernel/core_pattern" shows the whole string. but after we run core_pattern_pipe_test in man page, we found last parameter was truncated like below: argc[10]=<12807486> The root cause is core_pattern allows % specifiers, which need to be replaced during parse time, but the replace may expand the strings to larger than CORENAME_MAX_SIZE. So if the last parameter is % specifiers, the replace code is using snprintf(out_ptr, out_end - out_ptr, ...), this will write out of corename array. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Xiaotian Feng Cc: Alexander Viro Cc: Oleg Nesterov Cc: KOSAKI Motohiro Reviewed-by: Neil Horman Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 155 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 60 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 9722909c4d88..ca01d2d0a6d4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -66,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; int suid_dumpable = 0; +struct core_name { + char *corename; + int used, size; +}; +static atomic_t call_count = ATOMIC_INIT(1); + /* The maximal length of core_pattern is also specified in sysctl.c */ static LIST_HEAD(formats); @@ -1459,127 +1465,148 @@ void set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); +static int expand_corename(struct core_name *cn) +{ + char *old_corename = cn->corename; + + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); + + if (!cn->corename) { + kfree(old_corename); + return -ENOMEM; + } + + return 0; +} + +static int cn_printf(struct core_name *cn, const char *fmt, ...) +{ + char *cur; + int need; + int ret; + va_list arg; + + va_start(arg, fmt); + need = vsnprintf(NULL, 0, fmt, arg); + va_end(arg); + + if (likely(need < cn->size - cn->used - 1)) + goto out_printf; + + ret = expand_corename(cn); + if (ret) + goto expand_fail; + +out_printf: + cur = cn->corename + cn->used; + va_start(arg, fmt); + vsnprintf(cur, need + 1, fmt, arg); + va_end(arg); + cn->used += need; + return 0; + +expand_fail: + return ret; +} + /* format_corename will inspect the pattern parameter, and output a * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(char *corename, long signr) +static int format_corename(struct core_name *cn, long signr) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); - char *out_ptr = corename; - char *const out_end = corename + CORENAME_MAX_SIZE; - int rc; int pid_in_pattern = 0; + int err = 0; + + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); + cn->corename = kmalloc(cn->size, GFP_KERNEL); + cn->used = 0; + + if (!cn->corename) + return -ENOMEM; /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { if (*pat_ptr != '%') { - if (out_ptr == out_end) + if (*pat_ptr == 0) goto out; - *out_ptr++ = *pat_ptr++; + err = cn_printf(cn, "%c", *pat_ptr++); } else { switch (*++pat_ptr) { + /* single % at the end, drop that */ case 0: goto out; /* Double percent, output one percent */ case '%': - if (out_ptr == out_end) - goto out; - *out_ptr++ = '%'; + err = cn_printf(cn, "%c", '%'); break; /* pid */ case 'p': pid_in_pattern = 1; - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", task_tgid_vnr(current)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", + task_tgid_vnr(current)); break; /* uid */ case 'u': - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", cred->uid); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", cred->uid); break; /* gid */ case 'g': - rc = snprintf(out_ptr, out_end - out_ptr, - "%d", cred->gid); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%d", cred->gid); break; /* signal that caused the coredump */ case 's': - rc = snprintf(out_ptr, out_end - out_ptr, - "%ld", signr); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%ld", signr); break; /* UNIX time of coredump */ case 't': { struct timeval tv; do_gettimeofday(&tv); - rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", tv.tv_sec); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%lu", tv.tv_sec); break; } /* hostname */ case 'h': down_read(&uts_sem); - rc = snprintf(out_ptr, out_end - out_ptr, - "%s", utsname()->nodename); + err = cn_printf(cn, "%s", + utsname()->nodename); up_read(&uts_sem); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; break; /* executable */ case 'e': - rc = snprintf(out_ptr, out_end - out_ptr, - "%s", current->comm); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%s", current->comm); break; /* core limit size */ case 'c': - rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", rlimit(RLIMIT_CORE)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, "%lu", + rlimit(RLIMIT_CORE)); break; default: break; } ++pat_ptr; } + + if (err) + return err; } + /* Backward compatibility with core_uses_pid: * * If core_pattern does not include a %p (as is the default) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (!ispipe && !pid_in_pattern && core_uses_pid) { - rc = snprintf(out_ptr, out_end - out_ptr, - ".%d", task_tgid_vnr(current)); - if (rc > out_end - out_ptr) - goto out; - out_ptr += rc; + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); + if (err) + return err; } out: - *out_ptr = 0; return ispipe; } @@ -1856,7 +1883,7 @@ static int umh_pipe_setup(struct subprocess_info *info) void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; - char corename[CORENAME_MAX_SIZE + 1]; + struct core_name cn; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; @@ -1911,7 +1938,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) */ clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(corename, signr); + ispipe = format_corename(&cn, signr); + + if (ispipe == -ENOMEM) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } if (ispipe) { int dump_count; @@ -1948,7 +1981,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); @@ -1961,7 +1994,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) argv_free(helper_argv); if (retval) { printk(KERN_INFO "Core dump to %s pipe failed\n", - corename); + cn.corename); goto close_fail; } } else { @@ -1970,7 +2003,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - cprm.file = filp_open(corename, + cprm.file = filp_open(cn.corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(cprm.file)) @@ -2012,6 +2045,8 @@ fail_dropcount: if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + kfree(cn.corename); +fail_corename: coredump_finish(mm); revert_creds(old_cred); fail_creds: From 895021552d6ffe8a4d076cb5c4b1e700c33e96e1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 27 Oct 2010 15:34:09 -0700 Subject: [PATCH 080/139] coredump: default CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y The userland ELF tools have been coping with partial-segments core files for a few years now. Multiple distro builds are now setting this option. It behooves everyone who ever deals with core files to have more info dumped in there, especially as more and more people's compilers are producing build IDs. Make it the default. Anyone using older tools confused by these core files can configure this option off, or just change /proc/PID/coredump_filter after boot. Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index bb4cc5b8abc8..79e2ca7973b7 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC config CORE_DUMP_DEFAULT_ELF_HEADERS bool "Write ELF core dumps with partial segments" - default n + default y depends on BINFMT_ELF && ELF_CORE help ELF core dump files describe each memory mapping of the crashed @@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS inherited. See Documentation/filesystems/proc.txt for details. This config option changes the default setting of coredump_filter - seen at boot time. If unsure, say N. + seen at boot time. If unsure, say Y. config BINFMT_FLAT bool "Kernel support for flat binaries" From d16e15f5b029fc7d03540ba0e5fb23b0abb0ebe0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:10 -0700 Subject: [PATCH 081/139] exit: add lock context annotation on find_new_reaper() find_new_reaper() releases and regrabs tasklist_lock but was missing proper annotations. Add it. This remove following sparse warning: warning: context imbalance in 'find_new_reaper' - unexpected unlock Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index 894179a32ec1..b194febf5799 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -703,6 +703,8 @@ static void exit_mm(struct task_struct * tsk) * space. */ static struct task_struct *find_new_reaper(struct task_struct *father) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *thread; From b40d4f84becd69275451baee7f0801c85eb58437 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Wed, 27 Oct 2010 15:34:10 -0700 Subject: [PATCH 082/139] /proc/pid/smaps: export amount of anonymous memory in a mapping Export the number of anonymous pages in a mapping via smaps. Even the private pages in a mapping backed by a file, would be marked as anonymous, when they are modified. Export this information to user-space via smaps. Exporting this count will help gdb to make a better decision on which areas need to be dumped in its coredump; and should be useful to others studying the memory usage of a process. Signed-off-by: Nikanth Karthikesan Acked-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 13 ++++++++++--- fs/proc/task_mmu.c | 6 ++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a563b74c7aef..976de6e19dd8 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -370,6 +370,7 @@ Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 892 kB +Anonymous: 0 kB Swap: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB @@ -378,9 +379,15 @@ The first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping (size), the amount of the mapping that is currently resident in RAM (RSS), the process' proportional share of this mapping (PSS), the number of clean and -dirty shared pages in the mapping, and the number of clean and dirty private -pages in the mapping. The "Referenced" indicates the amount of memory -currently marked as referenced or accessed. +dirty private pages in the mapping. Note that even a page which is part of a +MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used +by only one process, is accounted as private and not as shared. "Referenced" +indicates the amount of memory currently marked as referenced or accessed. +"Anonymous" shows the amount of memory that does not belong to any file. Even +a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE +and a page is modified, the file page is replaced by a private anonymous copy. +"Swap" shows how much would-be-anonymous memory is also used, but out on +swap. This file is only present if the CONFIG_MMU kernel configuration option is enabled. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 871e25ed0069..da6b01d70f01 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -327,6 +327,7 @@ struct mem_size_stats { unsigned long private_clean; unsigned long private_dirty; unsigned long referenced; + unsigned long anonymous; unsigned long swap; u64 pss; }; @@ -357,6 +358,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!page) continue; + if (PageAnon(page)) + mss->anonymous += PAGE_SIZE; + mss->resident += PAGE_SIZE; /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) @@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v) "Private_Clean: %8lu kB\n" "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" + "Anonymous: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n", @@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v) mss.private_clean >> 10, mss.private_dirty >> 10, mss.referenced >> 10, + mss.anonymous >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10); From 03f890f8c2f5c9008d3d8f6d85267717ced4bd79 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Wed, 27 Oct 2010 15:34:11 -0700 Subject: [PATCH 083/139] /proc/pid/pagemap: document in Documentation/filesystems/proc.txt Document /proc/pid/pagemap in Documentation/filesystems/proc.txt Signed-off-by: Nikanth Karthikesan Cc: Richard Guenther Cc: Balbir Singh Cc: KOSAKI Motohiro Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 976de6e19dd8..e73df2722ff3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -136,6 +136,7 @@ Table 1-1: Process specific entries in /proc statm Process memory status information status Process status in human readable form wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + pagemap Page table stack Report full stack trace, enable via CONFIG_STACKTRACE smaps a extension based on maps, showing the memory consumption of each mapping @@ -404,6 +405,9 @@ To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs Any other value written to /proc/PID/clear_refs will have no effect. +The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags +using /proc/kpageflags and number of times a page is mapped using +/proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. 1.2 Kernel data --------------- From 19cd56c48da58bebc3a638e036bcab69469acd27 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 27 Oct 2010 15:34:12 -0700 Subject: [PATCH 084/139] procfs: fix /proc/softirqs formatting The length of the BLOCK_IPOLL string is making i's value be printed too far to the right. This patch fixes this and makes the output a bit neater. Currently: CPU0 HI: 0 TIMER: 599792 NET_TX: 2 NET_RX: 6 BLOCK: 80807 BLOCK_IOPOLL: 0 TASKLET: 20012 SCHED: 0 HRTIMER: 63 RCU: 619279 With patch: CPU0 HI: 0 TIMER: 585582 NET_TX: 2 NET_RX: 6 BLOCK: 80320 BLOCK_IOPOLL: 0 TASKLET: 19287 SCHED: 0 HRTIMER: 62 RCU: 604441 Signed-off-by: Davidlohr Bueso Acked-by: Keika Kobayashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/softirqs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 1807c2419f17..37994737c983 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v) { int i, j; - seq_printf(p, " "); + seq_printf(p, " "); for_each_possible_cpu(i) seq_printf(p, "CPU%-8d", i); seq_printf(p, "\n"); for (i = 0; i < NR_SOFTIRQS; i++) { - seq_printf(p, "%8s:", softirq_to_name[i]); + seq_printf(p, "%12s:", softirq_to_name[i]); for_each_possible_cpu(j) seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); seq_printf(p, "\n"); From f2c66cd8eeddedb440f33bc0f5cec1ed7ae376cb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:34:13 -0700 Subject: [PATCH 085/139] /proc/stat: scalability of irq num per cpu /proc/stat shows the total number of all interrupts to each cpu. But when the number of IRQs are very large, it take very long time and 'cat /proc/stat' takes more than 10 secs. This is because sum of all irq events are counted when /proc/stat is read. This patch adds "sum of all irq" counter percpu and reduce read costs. The cost of reading /proc/stat is important because it's used by major applications as 'top', 'ps', 'w', etc.... A test on a mechin (4096cpu, 256 nodes, 4592 irqs) shows %time cat /proc/stat > /dev/null Before Patch: 12.627 sec After Patch: 2.459 sec Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Jack Steiner Acked-by: Jack Steiner Cc: Yinghai Lu Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 4 +--- include/linux/kernel_stat.h | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf31b03fc275..b80c620565bf 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -52,9 +52,7 @@ static int show_stat(struct seq_file *p, void *v) guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); guest_nice = cputime64_add(guest_nice, kstat_cpu(i).cpustat.guest_nice); - for_each_irq_nr(j) { - sum += kstat_irqs_cpu(j, i); - } + sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index c059044bc6dc..8b9b89085530 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -33,6 +33,7 @@ struct kernel_stat { #ifndef CONFIG_GENERIC_HARDIRQS unsigned int irqs[NR_IRQS]; #endif + unsigned long irqs_sum; unsigned int softirqs[NR_SOFTIRQS]; }; @@ -54,6 +55,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) { kstat_this_cpu.irqs[irq]++; + kstat_this_cpu.irqs_sum++; } static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) @@ -65,8 +67,9 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]++) +#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\ + ((DESC)->kstat_irqs[smp_processor_id()]++);\ + kstat_this_cpu.irqs_sum++; } while (0) #endif @@ -94,6 +97,13 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } +/* + * Number of interrupts per cpu, since bootup + */ +static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) +{ + return kstat_cpu(cpu).irqs_sum; +} /* * Lock/unlock the current runqueue - to extract task statistics: From 478735e38887077ac77a9756121b6ce0cb956e2f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:34:15 -0700 Subject: [PATCH 086/139] /proc/stat: fix scalability of irq sum of all cpu In /proc/stat, the number of per-IRQ event is shown by making a sum each irq's events on all cpus. But we can make use of kstat_irqs(). kstat_irqs() do the same calculation, If !CONFIG_GENERIC_HARDIRQ, it's not a big cost. (Both of the number of cpus and irqs are small.) If a system is very big and CONFIG_GENERIC_HARDIRQ, it does for_each_irq() for_each_cpu() - look up a radix tree - read desc->irq_stat[cpu] This seems not efficient. This patch adds kstat_irqs() for CONFIG_GENRIC_HARDIRQ and change the calculation as for_each_irq() look up radix tree for_each_cpu() - read desc->irq_stat[cpu] This reduces cost. A test on (4096cpusp, 256 nodes, 4592 irqs) host (by Jack Steiner) %time cat /proc/stat > /dev/null Before Patch: 2.459 sec After Patch : .561 sec [akpm@linux-foundation.org: unexport kstat_irqs, coding-style tweaks] [akpm@linux-foundation.org: fix unused variable 'per_irq_sum'] Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Jack Steiner Acked-by: Jack Steiner Cc: Yinghai Lu Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 10 ++-------- include/linux/kernel_stat.h | 4 ++++ kernel/irq/irqdesc.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index b80c620565bf..e15a19c93bae 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; - unsigned int per_irq_sum; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -108,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { - per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); - - seq_printf(p, " %u", per_irq_sum); - } + for_each_irq_nr(j) + seq_printf(p, " %u", kstat_irqs(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8b9b89085530..ad54c846911b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -86,6 +86,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ +#ifndef CONFIG_GENERIC_HARDIRQS static inline unsigned int kstat_irqs(unsigned int irq) { unsigned int sum = 0; @@ -96,6 +97,9 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } +#else +extern unsigned int kstat_irqs(unsigned int irq); +#endif /* * Number of interrupts per cpu, since bootup diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) struct irq_desc *desc = irq_to_desc(irq); return desc ? desc->kstat_irqs[cpu] : 0; } + +#ifdef CONFIG_GENERIC_HARDIRQS +unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + int cpu; + int sum = 0; + + if (!desc) + return 0; + for_each_possible_cpu(cpu) + sum += desc->kstat_irqs[cpu]; + return sum; +} +#endif /* CONFIG_GENERIC_HARDIRQS */ From 98391cf4dcf893e9e74e1c14189851dbc9c5ad0d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 27 Oct 2010 15:34:16 -0700 Subject: [PATCH 087/139] exec: don't turn PF_KTHREAD off when a target command was not found Presently do_execve() turns PF_KTHREAD off before search_binary_handler(). THis has a theorical risk of PF_KTHREAD getting lost. We don't have to turn PF_KTHREAD off in the ENOEXEC case. This patch moves this flag modification to after the finding of the executable file. This is only a theorical issue because kthreads do not call do_execve() directly. But fixing would be better. Signed-off-by: KOSAKI Motohiro Acked-by: Roland McGrath Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index ca01d2d0a6d4..99d33a1371e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1009,7 +1009,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ - current->flags &= ~PF_RANDOMIZE; + current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1412,7 +1412,6 @@ int do_execve(const char * filename, if (retval < 0) goto out; - current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); if (retval < 0) goto out; From b795218075a1e1183169abb66a90dcdcf30367f9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 27 Oct 2010 15:34:16 -0700 Subject: [PATCH 088/139] ipc/shm.c: add RSS and swap size information to /proc/sysvipc/shm The kernel currently provides no functionality to analyze the RSS and swap space usage of each individual sysvipc shared memory segment. This patch adds this info for each existing shm segment by extending the output of /proc/sysvipc/shm by two columns for RSS and swap. Since shmctl(SHM_INFO) already provides a similiar calculation (it currently sums up all RSS/swap info for all segments), I did split out a static function which is now used by the /proc/sysvipc/shm output and shmctl(SHM_INFO). SAP products (esp. the SAP Netweaver ABAP Kernel) uses lots of big shared memory segments (we often have Linux systems with >= 16GB shm usage). Sometimes we get customer reports about "slow" system responses and while looking into their configurations we often find massive swapping activity on the system. With this patch it's now easy to see from the command line if and which shm segments gets swapped out (and how much) and can more easily give recommendations for system tuning. Without the patch it's currently not possible to do such shm analysis at all. Also... Add some spaces in front of the "size" field for 64bit kernels to get the columns correct if you cat the contents of the file. In sysvipc_shm_proc_show() the kernel prints the size value in "SPEC_SIZE" format, which is defined like this: #if BITS_PER_LONG <= 32 #define SIZE_SPEC "%10lu" #else #define SIZE_SPEC "%21lu" #endif So, if the header is not adjusted, the columns are not correctly aligned. I actually tested this on 32- and 64-bit and it seems correct now. Signed-off-by: Helge Deller Cc: Manfred Spraul Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 63 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 7bc46a9fe1f8..fd658a1c2b88 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -108,7 +108,11 @@ void __init shm_init (void) { shm_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/shm", - " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", +#if BITS_PER_LONG <= 32 + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#else + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#endif IPC_SHM_IDS, sysvipc_shm_proc_show); } @@ -543,6 +547,34 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf } } +/* + * Calculate and add used RSS and swap pages of a shm. + * Called with shm_ids.rw_mutex held as a reader + */ +static void shm_add_rss_swap(struct shmid_kernel *shp, + unsigned long *rss_add, unsigned long *swp_add) +{ + struct inode *inode; + + inode = shp->shm_file->f_path.dentry->d_inode; + + if (is_file_hugepages(shp->shm_file)) { + struct address_space *mapping = inode->i_mapping; + struct hstate *h = hstate_file(shp->shm_file); + *rss_add += pages_per_huge_page(h) * mapping->nrpages; + } else { +#ifdef CONFIG_SHMEM + struct shmem_inode_info *info = SHMEM_I(inode); + spin_lock(&info->lock); + *rss_add += inode->i_mapping->nrpages; + *swp_add += info->swapped; + spin_unlock(&info->lock); +#else + *rss_add += inode->i_mapping->nrpages; +#endif + } +} + /* * Called with shm_ids.rw_mutex held as a reader */ @@ -560,30 +592,13 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, for (total = 0, next_id = 0; total < in_use; next_id++) { struct kern_ipc_perm *ipc; struct shmid_kernel *shp; - struct inode *inode; ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); if (ipc == NULL) continue; shp = container_of(ipc, struct shmid_kernel, shm_perm); - inode = shp->shm_file->f_path.dentry->d_inode; - - if (is_file_hugepages(shp->shm_file)) { - struct address_space *mapping = inode->i_mapping; - struct hstate *h = hstate_file(shp->shm_file); - *rss += pages_per_huge_page(h) * mapping->nrpages; - } else { -#ifdef CONFIG_SHMEM - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - *rss += inode->i_mapping->nrpages; - *swp += info->swapped; - spin_unlock(&info->lock); -#else - *rss += inode->i_mapping->nrpages; -#endif - } + shm_add_rss_swap(shp, rss, swp); total++; } @@ -1072,6 +1087,9 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { struct shmid_kernel *shp = it; + unsigned long rss = 0, swp = 0; + + shm_add_rss_swap(shp, &rss, &swp); #if BITS_PER_LONG <= 32 #define SIZE_SPEC "%10lu" @@ -1081,7 +1099,8 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) return seq_printf(s, "%10d %10d %4o " SIZE_SPEC " %5u %5u " - "%5lu %5u %5u %5u %5u %10lu %10lu %10lu\n", + "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " + SIZE_SPEC " " SIZE_SPEC "\n", shp->shm_perm.key, shp->shm_perm.id, shp->shm_perm.mode, @@ -1095,6 +1114,8 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) shp->shm_perm.cgid, shp->shm_atim, shp->shm_dtim, - shp->shm_ctim); + shp->shm_ctim, + rss * PAGE_SIZE, + swp * PAGE_SIZE); } #endif From 03145beb455cf5c20a761e8451e30b8a74ba58d9 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 27 Oct 2010 15:34:17 -0700 Subject: [PATCH 089/139] ipc: initialize structure memory to zero for compat functions This takes care of leaking uninitialized kernel stack memory to userspace from non-zeroed fields in structs in compat ipc functions. Signed-off-by: Dan Rosenberg Cc: Manfred Spraul Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/compat.c | 6 ++++++ ipc/compat_mq.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/ipc/compat.c b/ipc/compat.c index 9dc2c7d3c9e6..845a28738d3a 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -241,6 +241,8 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr) struct semid64_ds __user *up64; int version = compat_ipc_parse_version(&third); + memset(&s64, 0, sizeof(s64)); + if (!uptr) return -EINVAL; if (get_user(pad, (u32 __user *) uptr)) @@ -421,6 +423,8 @@ long compat_sys_msgctl(int first, int second, void __user *uptr) int version = compat_ipc_parse_version(&second); void __user *p; + memset(&m64, 0, sizeof(m64)); + switch (second & (~IPC_64)) { case IPC_INFO: case IPC_RMID: @@ -594,6 +598,8 @@ long compat_sys_shmctl(int first, int second, void __user *uptr) int err, err2; int version = compat_ipc_parse_version(&second); + memset(&s64, 0, sizeof(s64)); + switch (second & (~IPC_64)) { case IPC_RMID: case SHM_LOCK: diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c index d8d1e9ff4e88..380ea4fe08e7 100644 --- a/ipc/compat_mq.c +++ b/ipc/compat_mq.c @@ -53,6 +53,9 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name, void __user *p = NULL; if (u_attr && oflag & O_CREAT) { struct mq_attr attr; + + memset(&attr, 0, sizeof(attr)); + p = compat_alloc_user_space(sizeof(attr)); if (get_compat_mq_attr(&attr, u_attr) || copy_to_user(p, &attr, sizeof(attr))) @@ -127,6 +130,8 @@ asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p)); long ret; + memset(&mqstat, 0, sizeof(mqstat)); + if (u_mqstat) { if (get_compat_mq_attr(&mqstat, u_mqstat) || copy_to_user(p, &mqstat, sizeof(mqstat))) From 60ee6d5faf5f7920ba88b82c072864596f5b88af Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 27 Oct 2010 15:34:18 -0700 Subject: [PATCH 090/139] ipmi: fix __init and __exit attribute locations __init and __exit belong after the return type on functions, not before. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_devintf.c | 4 ++-- drivers/char/ipmi/ipmi_msghandler.c | 4 ++-- drivers/char/ipmi/ipmi_si_intf.c | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 1fc8876af1f5..2aa3977aae5e 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -916,7 +916,7 @@ static struct ipmi_smi_watcher smi_watcher = .smi_gone = ipmi_smi_gone, }; -static __init int init_ipmi_devintf(void) +static int __init init_ipmi_devintf(void) { int rv; @@ -954,7 +954,7 @@ static __init int init_ipmi_devintf(void) } module_init(init_ipmi_devintf); -static __exit void cleanup_ipmi(void) +static void __exit cleanup_ipmi(void) { struct ipmi_reg_list *entry, *entry2; mutex_lock(®_list_mutex); diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 4f3f8c9ec262..2fe72f8edf44 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4442,13 +4442,13 @@ static int ipmi_init_msghandler(void) return 0; } -static __init int ipmi_init_msghandler_mod(void) +static int __init ipmi_init_msghandler_mod(void) { ipmi_init_msghandler(); return 0; } -static __exit void cleanup_ipmi(void) +static void __exit cleanup_ipmi(void) { int count; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index b293d57d30a7..035da9e64a17 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1846,7 +1846,7 @@ static int hotmod_handler(const char *val, struct kernel_param *kp) return rv; } -static __devinit void hardcode_find_bmc(void) +static void __devinit hardcode_find_bmc(void) { int i; struct smi_info *info; @@ -2029,7 +2029,7 @@ struct SPMITable { s8 spmi_id[1]; /* A '\0' terminated array starts here. */ }; -static __devinit int try_init_spmi(struct SPMITable *spmi) +static int __devinit try_init_spmi(struct SPMITable *spmi) { struct smi_info *info; @@ -2112,7 +2112,7 @@ static __devinit int try_init_spmi(struct SPMITable *spmi) return 0; } -static __devinit void spmi_find_bmc(void) +static void __devinit spmi_find_bmc(void) { acpi_status status; struct SPMITable *spmi; @@ -2325,7 +2325,7 @@ static int __devinit decode_dmi(const struct dmi_header *dm, return 0; } -static __devinit void try_init_dmi(struct dmi_ipmi_data *ipmi_data) +static void __devinit try_init_dmi(struct dmi_ipmi_data *ipmi_data) { struct smi_info *info; @@ -3012,7 +3012,7 @@ static __devinitdata struct ipmi_default_vals { .port = 0 } }; -static __devinit void default_find_bmc(void) +static void __devinit default_find_bmc(void) { struct smi_info *info; int i; @@ -3312,7 +3312,7 @@ static int try_smi_init(struct smi_info *new_smi) return rv; } -static __devinit int init_ipmi_si(void) +static int __devinit init_ipmi_si(void) { int i; char *str; @@ -3525,7 +3525,7 @@ static void cleanup_one_si(struct smi_info *to_clean) kfree(to_clean); } -static __exit void cleanup_ipmi_si(void) +static void __exit cleanup_ipmi_si(void) { struct smi_info *e, *tmp_e; From 713efa9a64fac01abc296228c5ca4f507217a8c9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Oct 2010 15:34:18 -0700 Subject: [PATCH 091/139] drivers/char/rocket.c: release_region or error path There was a release_region() missing on the error path. Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/rocket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 7c79d243acc9..86308830ac42 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -2345,7 +2345,7 @@ static int __init rp_init(void) ret = tty_register_driver(rocket_driver); if (ret < 0) { printk(KERN_ERR "Couldn't install tty RocketPort driver\n"); - goto err_tty; + goto err_controller; } #ifdef ROCKET_DEBUG_OPEN @@ -2380,6 +2380,9 @@ static int __init rp_init(void) return 0; err_ttyu: tty_unregister_driver(rocket_driver); +err_controller: + if (controller) + release_region(controller, 4); err_tty: put_tty_driver(rocket_driver); err: From 656e17ea9d997db1bb37d90b7a447eb511af5a63 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Oct 2010 15:34:19 -0700 Subject: [PATCH 092/139] drivers/char/hvc_console.c: remove unneeded __set_current_state(TASK_RUNNING) This doesn't do anything. Cc: Timur Tabi Cc: Benjamin Herrenschmidt Cc: Amit Shah Cc: Greg Kroah-Hartman Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/hvc_console.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 3afd62e856eb..e9cba13ee800 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -713,7 +713,6 @@ static int khvcd(void *unused) struct hvc_struct *hp; set_freezable(); - __set_current_state(TASK_RUNNING); do { poll_mask = 0; hvc_kicked = 0; From dcade5ed16cce572e375bf4e63dd2150c351bf49 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 27 Oct 2010 15:34:20 -0700 Subject: [PATCH 093/139] SGI Altix IA64 mmtimer: eliminate long interval timer holdoffs This patch for SGI Altix/IA64 eliminates interval long timer holdoffs in cases where we don't start an interval timer before the expiration time. This sometimes happens when a number of interval timers on the same shub with the same interval run simultaneously. Signed-off-by: Dimitri Sivanich Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mmtimer.c | 60 ++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index c070b53984e4..e6d75627c6c8 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -176,9 +176,9 @@ static void mmtimer_setup_int_2(int cpu, u64 expires) * in order to insure that the setup succeeds in a deterministic time frame. * It will check if the interrupt setup succeeded. */ -static int mmtimer_setup(int cpu, int comparator, unsigned long expires) +static int mmtimer_setup(int cpu, int comparator, unsigned long expires, + u64 *set_completion_time) { - switch (comparator) { case 0: mmtimer_setup_int_0(cpu, expires); @@ -191,7 +191,8 @@ static int mmtimer_setup(int cpu, int comparator, unsigned long expires) break; } /* We might've missed our expiration time */ - if (rtc_time() <= expires) + *set_completion_time = rtc_time(); + if (*set_completion_time <= expires) return 1; /* @@ -227,6 +228,8 @@ static int mmtimer_disable_int(long nasid, int comparator) #define TIMER_OFF 0xbadcabLL /* Timer is not setup */ #define TIMER_SET 0 /* Comparator is set for this timer */ +#define MMTIMER_INTERVAL_RETRY_INCREMENT_DEFAULT 40 + /* There is one of these for each timer */ struct mmtimer { struct rb_node list; @@ -242,6 +245,11 @@ struct mmtimer_node { }; static struct mmtimer_node *timers; +static unsigned mmtimer_interval_retry_increment = + MMTIMER_INTERVAL_RETRY_INCREMENT_DEFAULT; +module_param(mmtimer_interval_retry_increment, uint, 0644); +MODULE_PARM_DESC(mmtimer_interval_retry_increment, + "RTC ticks to add to expiration on interval retry (default 40)"); /* * Add a new mmtimer struct to the node's mmtimer list. @@ -289,7 +297,8 @@ static void mmtimer_set_next_timer(int nodeid) struct mmtimer_node *n = &timers[nodeid]; struct mmtimer *x; struct k_itimer *t; - int o; + u64 expires, exp, set_completion_time; + int i; restart: if (n->next == NULL) @@ -300,7 +309,8 @@ restart: if (!t->it.mmtimer.incr) { /* Not an interval timer */ if (!mmtimer_setup(x->cpu, COMPARATOR, - t->it.mmtimer.expires)) { + t->it.mmtimer.expires, + &set_completion_time)) { /* Late setup, fire now */ tasklet_schedule(&n->tasklet); } @@ -308,14 +318,23 @@ restart: } /* Interval timer */ - o = 0; - while (!mmtimer_setup(x->cpu, COMPARATOR, t->it.mmtimer.expires)) { - unsigned long e, e1; - struct rb_node *next; - t->it.mmtimer.expires += t->it.mmtimer.incr << o; - t->it_overrun += 1 << o; - o++; - if (o > 20) { + i = 0; + expires = exp = t->it.mmtimer.expires; + while (!mmtimer_setup(x->cpu, COMPARATOR, expires, + &set_completion_time)) { + int to; + + i++; + expires = set_completion_time + + mmtimer_interval_retry_increment + (1 << i); + /* Calculate overruns as we go. */ + to = ((u64)(expires - exp) / t->it.mmtimer.incr); + if (to) { + t->it_overrun += to; + t->it.mmtimer.expires += t->it.mmtimer.incr * to; + exp = t->it.mmtimer.expires; + } + if (i > 20) { printk(KERN_ALERT "mmtimer: cannot reschedule timer\n"); t->it.mmtimer.clock = TIMER_OFF; n->next = rb_next(&x->list); @@ -323,21 +342,6 @@ restart: kfree(x); goto restart; } - - e = t->it.mmtimer.expires; - next = rb_next(&x->list); - - if (next == NULL) - continue; - - e1 = rb_entry(next, struct mmtimer, list)-> - timer->it.mmtimer.expires; - if (e > e1) { - n->next = next; - rb_erase(&x->list, &n->timer_head); - mmtimer_add_list(x); - goto restart; - } } } From ffd7d6baa65e6161cfd996a59d55c48571c2a5f3 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Wed, 27 Oct 2010 15:34:20 -0700 Subject: [PATCH 094/139] synclink_gt: fix per device locking Fix a long standing bug with per device locking. Each device has an associated spinlock for synchronizing access to hardware and state information with the ISR. A single hardware card has one or more devices. Bug: Non ISR code correctly acquires and releases the per device lock. ISR incorrectly always acquires and releases the lock of the first device on the card. The decoupled and list based nature of the ISR and deferred processing interaction allowed this to work in normal operation. Exceptional events like an application forcing hardware shutdown, reset, or reconfiguration while active can trigger the bug. Fixed ISR to acquire and release the per device lock. One exception is manipulation of the GPIO card resource which is global and effectively owned by the first device of the card. Non-ISR access to GPIO resource is changed to use lock of first device on card. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 1746d91205f7..11999784383e 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -2357,26 +2357,27 @@ static irqreturn_t slgt_interrupt(int dummy, void *dev_id) DBGISR(("slgt_interrupt irq=%d entry\n", info->irq_level)); - spin_lock(&info->lock); - while((gsr = rd_reg32(info, GSR) & 0xffffff00)) { DBGISR(("%s gsr=%08x\n", info->device_name, gsr)); info->irq_occurred = true; for(i=0; i < info->port_count ; i++) { if (info->port_array[i] == NULL) continue; + spin_lock(&info->port_array[i]->lock); if (gsr & (BIT8 << i)) isr_serial(info->port_array[i]); if (gsr & (BIT16 << (i*2))) isr_rdma(info->port_array[i]); if (gsr & (BIT17 << (i*2))) isr_tdma(info->port_array[i]); + spin_unlock(&info->port_array[i]->lock); } } if (info->gpio_present) { unsigned int state; unsigned int changed; + spin_lock(&info->lock); while ((changed = rd_reg32(info, IOSR)) != 0) { DBGISR(("%s iosr=%08x\n", info->device_name, changed)); /* read latched state of GPIO signals */ @@ -2388,22 +2389,24 @@ static irqreturn_t slgt_interrupt(int dummy, void *dev_id) isr_gpio(info->port_array[i], changed, state); } } + spin_unlock(&info->lock); } for(i=0; i < info->port_count ; i++) { struct slgt_info *port = info->port_array[i]; - - if (port && (port->port.count || port->netcount) && + if (port == NULL) + continue; + spin_lock(&port->lock); + if ((port->port.count || port->netcount) && port->pending_bh && !port->bh_running && !port->bh_requested) { DBGISR(("%s bh queued\n", port->device_name)); schedule_work(&port->task); port->bh_requested = true; } + spin_unlock(&port->lock); } - spin_unlock(&info->lock); - DBGISR(("slgt_interrupt irq=%d exit\n", info->irq_level)); return IRQ_HANDLED; } @@ -2906,7 +2909,7 @@ static int set_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio) info->device_name, gpio.state, gpio.smask, gpio.dir, gpio.dmask)); - spin_lock_irqsave(&info->lock,flags); + spin_lock_irqsave(&info->port_array[0]->lock, flags); if (gpio.dmask) { data = rd_reg32(info, IODR); data |= gpio.dmask & gpio.dir; @@ -2919,7 +2922,7 @@ static int set_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio) data &= ~(gpio.smask & ~gpio.state); wr_reg32(info, IOVR, data); } - spin_unlock_irqrestore(&info->lock,flags); + spin_unlock_irqrestore(&info->port_array[0]->lock, flags); return 0; } @@ -3020,7 +3023,7 @@ static int wait_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio) return -EINVAL; init_cond_wait(&wait, gpio.smask); - spin_lock_irqsave(&info->lock, flags); + spin_lock_irqsave(&info->port_array[0]->lock, flags); /* enable interrupts for watched pins */ wr_reg32(info, IOER, rd_reg32(info, IOER) | gpio.smask); /* get current pin states */ @@ -3032,20 +3035,20 @@ static int wait_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio) } else { /* wait for target state */ add_cond_wait(&info->gpio_wait_q, &wait); - spin_unlock_irqrestore(&info->lock, flags); + spin_unlock_irqrestore(&info->port_array[0]->lock, flags); schedule(); if (signal_pending(current)) rc = -ERESTARTSYS; else gpio.state = wait.data; - spin_lock_irqsave(&info->lock, flags); + spin_lock_irqsave(&info->port_array[0]->lock, flags); remove_cond_wait(&info->gpio_wait_q, &wait); } /* disable all GPIO interrupts if no waiting processes */ if (info->gpio_wait_q == NULL) wr_reg32(info, IOER, 0); - spin_unlock_irqrestore(&info->lock,flags); + spin_unlock_irqrestore(&info->port_array[0]->lock, flags); if ((rc == 0) && copy_to_user(user_gpio, &gpio, sizeof(gpio))) rc = -EFAULT; @@ -3578,7 +3581,6 @@ static void device_init(int adapter_num, struct pci_dev *pdev) /* copy resource information from first port to others */ for (i = 1; i < port_count; ++i) { - port_array[i]->lock = port_array[0]->lock; port_array[i]->irq_level = port_array[0]->irq_level; port_array[i]->reg_addr = port_array[0]->reg_addr; alloc_dma_bufs(port_array[i]); From 19714a8af8fe8618a9beace1f7a3bb10d55d5e2f Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 27 Oct 2010 15:34:21 -0700 Subject: [PATCH 095/139] drivers/char/applicom.c: fix information leak to userland Structure st_loc is copied to userland with some fields unitialized. It leads to leaking of stack memory. Signed-off-by: Vasiliy Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/applicom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index e7ba774beda6..25373df1dcf8 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -566,6 +566,7 @@ static ssize_t ac_read (struct file *filp, char __user *buf, size_t count, loff_ struct mailbox mailbox; /* Got a packet for us */ + memset(&st_loc, 0, sizeof(st_loc)); ret = do_ac_read(i, buf, &st_loc, &mailbox); spin_unlock_irqrestore(&apbs[i].mutex, flags); set_current_state(TASK_RUNNING); From b9b1134260e036fb75c468514569864dd6722f3e Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 27 Oct 2010 15:34:21 -0700 Subject: [PATCH 096/139] drivers/char/ppdev.c: fix information leak to userland Structure par_timeout is copied to userland with some padding fields unitialized. Field tv_usec has type __kernel_suseconds_t, it differs from tv_sec's type on some architectures. It leads to leaking of stack memory. Signed-off-by: Vasiliy Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ppdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 723152d978a9..f176dbaeb15a 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -613,6 +613,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PPGETTIME: to_jiffies = pp->pdev->timeout; + memset(&par_timeout, 0, sizeof(par_timeout)); par_timeout.tv_sec = to_jiffies / HZ; par_timeout.tv_usec = (to_jiffies % (long)HZ) * (1000000/HZ); if (copy_to_user (argp, &par_timeout, sizeof(struct timeval))) From ed77ed6112f2d4b650f4be7dbaf14e06e1d393a5 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 27 Oct 2010 15:34:22 -0700 Subject: [PATCH 097/139] drivers/char/synclink_gt.c: fix information leak to userland Structures tmp_params and new_line are copied to userland with some padding fields unitialized. It leads to leaking of stack memory. Signed-off-by: Vasiliy Kulikov Acked-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 11999784383e..9f7fc71474b4 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -1132,6 +1132,7 @@ static long get_params32(struct slgt_info *info, struct MGSL_PARAMS32 __user *us struct MGSL_PARAMS32 tmp_params; DBGINFO(("%s get_params32\n", info->device_name)); + memset(&tmp_params, 0, sizeof(tmp_params)); tmp_params.mode = (compat_ulong_t)info->params.mode; tmp_params.loopback = info->params.loopback; tmp_params.flags = info->params.flags; @@ -1617,6 +1618,8 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (cmd != SIOCWANDEV) return hdlc_ioctl(dev, ifr, cmd); + memset(&new_line, 0, sizeof(new_line)); + switch(ifr->ifr_settings.type) { case IF_GET_IFACE: /* return current sync_serial_settings */ From 9807224f1dce5fb746ee33fb67ea2e38dafe3e9c Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Wed, 27 Oct 2010 15:34:22 -0700 Subject: [PATCH 098/139] drivers/char/synclink_gt.c: add extended sync feature Add support for extended byte synchronous mode feature of hardware. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 111 ++++++++++++++++++++++++++++++++++++- include/linux/synclink.h | 5 ++ 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 9f7fc71474b4..d01fffeac951 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -301,6 +301,8 @@ struct slgt_info { unsigned int rx_pio; unsigned int if_mode; unsigned int base_clock; + unsigned int xsync; + unsigned int xctrl; /* device status */ @@ -405,6 +407,8 @@ static MGSL_PARAMS default_params = { #define TDCSR 0x94 /* tx DMA control/status */ #define RDDAR 0x98 /* rx DMA descriptor address */ #define TDDAR 0x9c /* tx DMA descriptor address */ +#define XSR 0x40 /* extended sync pattern */ +#define XCR 0x44 /* extended control */ #define RXIDLE BIT14 #define RXBREAK BIT14 @@ -517,6 +521,10 @@ static int set_interface(struct slgt_info *info, int if_mode); static int set_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); static int get_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); static int wait_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); +static int get_xsync(struct slgt_info *info, int __user *if_mode); +static int set_xsync(struct slgt_info *info, int if_mode); +static int get_xctrl(struct slgt_info *info, int __user *if_mode); +static int set_xctrl(struct slgt_info *info, int if_mode); /* * driver functions @@ -1056,6 +1064,14 @@ static int ioctl(struct tty_struct *tty, struct file *file, return get_gpio(info, argp); case MGSL_IOCWAITGPIO: return wait_gpio(info, argp); + case MGSL_IOCGXSYNC: + return get_xsync(info, argp); + case MGSL_IOCSXSYNC: + return set_xsync(info, (int)arg); + case MGSL_IOCGXCTRL: + return get_xctrl(info, argp); + case MGSL_IOCSXCTRL: + return set_xctrl(info, (int)arg); } mutex_lock(&info->port.mutex); switch (cmd) { @@ -1213,12 +1229,16 @@ static long slgt_compat_ioctl(struct tty_struct *tty, struct file *file, case MGSL_IOCSGPIO: case MGSL_IOCGGPIO: case MGSL_IOCWAITGPIO: + case MGSL_IOCGXSYNC: + case MGSL_IOCGXCTRL: case MGSL_IOCSTXIDLE: case MGSL_IOCTXENABLE: case MGSL_IOCRXENABLE: case MGSL_IOCTXABORT: case TIOCMIWAIT: case MGSL_IOCSIF: + case MGSL_IOCSXSYNC: + case MGSL_IOCSXCTRL: rc = ioctl(tty, file, cmd, arg); break; } @@ -1961,6 +1981,7 @@ static void bh_handler(struct work_struct *work) case MGSL_MODE_RAW: case MGSL_MODE_MONOSYNC: case MGSL_MODE_BISYNC: + case MGSL_MODE_XSYNC: while(rx_get_buf(info)); break; } @@ -2889,6 +2910,69 @@ static int set_interface(struct slgt_info *info, int if_mode) return 0; } +static int get_xsync(struct slgt_info *info, int __user *xsync) +{ + DBGINFO(("%s get_xsync=%x\n", info->device_name, info->xsync)); + if (put_user(info->xsync, xsync)) + return -EFAULT; + return 0; +} + +/* + * set extended sync pattern (1 to 4 bytes) for extended sync mode + * + * sync pattern is contained in least significant bytes of value + * most significant byte of sync pattern is oldest (1st sent/detected) + */ +static int set_xsync(struct slgt_info *info, int xsync) +{ + unsigned long flags; + + DBGINFO(("%s set_xsync=%x)\n", info->device_name, xsync)); + spin_lock_irqsave(&info->lock, flags); + info->xsync = xsync; + wr_reg32(info, XSR, xsync); + spin_unlock_irqrestore(&info->lock, flags); + return 0; +} + +static int get_xctrl(struct slgt_info *info, int __user *xctrl) +{ + DBGINFO(("%s get_xctrl=%x\n", info->device_name, info->xctrl)); + if (put_user(info->xctrl, xctrl)) + return -EFAULT; + return 0; +} + +/* + * set extended control options + * + * xctrl[31:19] reserved, must be zero + * xctrl[18:17] extended sync pattern length in bytes + * 00 = 1 byte in xsr[7:0] + * 01 = 2 bytes in xsr[15:0] + * 10 = 3 bytes in xsr[23:0] + * 11 = 4 bytes in xsr[31:0] + * xctrl[16] 1 = enable terminal count, 0=disabled + * xctrl[15:0] receive terminal count for fixed length packets + * value is count minus one (0 = 1 byte packet) + * when terminal count is reached, receiver + * automatically returns to hunt mode and receive + * FIFO contents are flushed to DMA buffers with + * end of frame (EOF) status + */ +static int set_xctrl(struct slgt_info *info, int xctrl) +{ + unsigned long flags; + + DBGINFO(("%s set_xctrl=%x)\n", info->device_name, xctrl)); + spin_lock_irqsave(&info->lock, flags); + info->xctrl = xctrl; + wr_reg32(info, XCR, xctrl); + spin_unlock_irqrestore(&info->lock, flags); + return 0; +} + /* * set general purpose IO pin state and direction * @@ -3768,7 +3852,9 @@ module_exit(slgt_exit); #define CALC_REGADDR() \ unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \ if (addr >= 0x80) \ - reg_addr += (info->port_num) * 32; + reg_addr += (info->port_num) * 32; \ + else if (addr >= 0x40) \ + reg_addr += (info->port_num) * 16; static __u8 rd_reg8(struct slgt_info *info, unsigned int addr) { @@ -4187,7 +4273,13 @@ static void sync_mode(struct slgt_info *info) /* TCR (tx control) * - * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync + * 15..13 mode + * 000=HDLC/SDLC + * 001=raw bit synchronous + * 010=asynchronous/isochronous + * 011=monosync byte synchronous + * 100=bisync byte synchronous + * 101=xsync byte synchronous * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4202,6 +4294,9 @@ static void sync_mode(struct slgt_info *info) val = BIT2; switch(info->params.mode) { + case MGSL_MODE_XSYNC: + val |= BIT15 + BIT13; + break; case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; case MGSL_MODE_BISYNC: val |= BIT15; break; case MGSL_MODE_RAW: val |= BIT13; break; @@ -4256,7 +4351,13 @@ static void sync_mode(struct slgt_info *info) /* RCR (rx control) * - * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync + * 15..13 mode + * 000=HDLC/SDLC + * 001=raw bit synchronous + * 010=asynchronous/isochronous + * 011=monosync byte synchronous + * 100=bisync byte synchronous + * 101=xsync byte synchronous * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4268,6 +4369,9 @@ static void sync_mode(struct slgt_info *info) val = 0; switch(info->params.mode) { + case MGSL_MODE_XSYNC: + val |= BIT15 + BIT13; + break; case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; case MGSL_MODE_BISYNC: val |= BIT15; break; case MGSL_MODE_RAW: val |= BIT13; break; @@ -4684,6 +4788,7 @@ static bool rx_get_buf(struct slgt_info *info) switch(info->params.mode) { case MGSL_MODE_MONOSYNC: case MGSL_MODE_BISYNC: + case MGSL_MODE_XSYNC: /* ignore residue in byte synchronous modes */ if (desc_residue(info->rbufs[i])) count--; diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 0ff2779c44d0..2e7d81c4e5ad 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -126,6 +126,7 @@ #define MGSL_MODE_BISYNC 4 #define MGSL_MODE_RAW 6 #define MGSL_MODE_BASE_CLOCK 7 +#define MGSL_MODE_XSYNC 8 #define MGSL_BUS_TYPE_ISA 1 #define MGSL_BUS_TYPE_EISA 2 @@ -290,6 +291,10 @@ struct gpio_desc { #define MGSL_IOCSGPIO _IOW(MGSL_MAGIC_IOC,16,struct gpio_desc) #define MGSL_IOCGGPIO _IOR(MGSL_MAGIC_IOC,17,struct gpio_desc) #define MGSL_IOCWAITGPIO _IOWR(MGSL_MAGIC_IOC,18,struct gpio_desc) +#define MGSL_IOCSXSYNC _IO(MGSL_MAGIC_IOC, 19) +#define MGSL_IOCGXSYNC _IO(MGSL_MAGIC_IOC, 20) +#define MGSL_IOCSXCTRL _IO(MGSL_MAGIC_IOC, 21) +#define MGSL_IOCGXCTRL _IO(MGSL_MAGIC_IOC, 22) #ifdef __KERNEL__ /* provide 32 bit ioctl compatibility on 64 bit systems */ From 44cd4dffe28b18139fe76e2a7b39cd1fdb459619 Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Wed, 27 Oct 2010 15:34:23 -0700 Subject: [PATCH 099/139] drivers/char/ip2/Makefile: replace the use of -objs with -y Changed -objs to -y in Makefile. Signed-off-by: Tracey Dent Cc: Jiri Slaby Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ip2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ip2/Makefile b/drivers/char/ip2/Makefile index bc397d92b499..7b78e0dfc5b0 100644 --- a/drivers/char/ip2/Makefile +++ b/drivers/char/ip2/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_COMPUTONE) += ip2.o -ip2-objs := ip2main.o +ip2-y := ip2main.o From 7fa51743dcf176bcdca43603b4257748a222fe9b Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Wed, 27 Oct 2010 15:34:23 -0700 Subject: [PATCH 100/139] drivers/char/ipmi/Makefile: replace the use of -objs with -y Changed -objs to -y in Makefile. Signed-off-by: Tracey Dent Cc: Jiri Slaby Cc: Michal Marek Cc: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile index eb8a1a8c188e..16a93648d54e 100644 --- a/drivers/char/ipmi/Makefile +++ b/drivers/char/ipmi/Makefile @@ -2,7 +2,7 @@ # Makefile for the ipmi drivers. # -ipmi_si-objs := ipmi_si_intf.o ipmi_kcs_sm.o ipmi_smic_sm.o ipmi_bt_sm.o +ipmi_si-y := ipmi_si_intf.o ipmi_kcs_sm.o ipmi_smic_sm.o ipmi_bt_sm.o obj-$(CONFIG_IPMI_HANDLER) += ipmi_msghandler.o obj-$(CONFIG_IPMI_DEVICE_INTERFACE) += ipmi_devintf.o From cb299ba8b5ef2239429484072fea394cd7581bd7 Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Wed, 27 Oct 2010 15:34:24 -0700 Subject: [PATCH 101/139] drivers/char/mwave/Makefile: clean up Changed -objs to -y in Makefile and use ccflags-y option. Signed-off-by: Tracey Dent Cc: Jiri Slaby Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mwave/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/mwave/Makefile b/drivers/char/mwave/Makefile index 754c9e2058ed..26b4fce217b6 100644 --- a/drivers/char/mwave/Makefile +++ b/drivers/char/mwave/Makefile @@ -6,10 +6,10 @@ obj-$(CONFIG_MWAVE) += mwave.o -mwave-objs := mwavedd.o smapi.o tp3780i.o 3780i.o +mwave-y := mwavedd.o smapi.o tp3780i.o 3780i.o # To have the mwave driver disable other uarts if necessary # EXTRA_CFLAGS += -DMWAVE_FUTZ_WITH_OTHER_DEVICES # To compile in lots (~20 KiB) of run-time enablable printk()s for debugging: -EXTRA_CFLAGS += -DMW_TRACE +ccflags-y := -DMW_TRACE From a222945eb1568687eec47113027586da47e1ee94 Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Wed, 27 Oct 2010 15:34:25 -0700 Subject: [PATCH 102/139] drivers/char/pcmcia/ipwireless/Makefile: Makefile: replace the use of -objs with -y Changed -objs to -y in Makefile. Signed-off-by: Tracey Dent Cc: Jiri Slaby Cc: Michal Marek Acked-by: Jiri Kosina Acked-by: David Sterba Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/pcmcia/ipwireless/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/pcmcia/ipwireless/Makefile b/drivers/char/pcmcia/ipwireless/Makefile index b71eb593643d..db80873d7f20 100644 --- a/drivers/char/pcmcia/ipwireless/Makefile +++ b/drivers/char/pcmcia/ipwireless/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IPWIRELESS) += ipwireless.o -ipwireless-objs := hardware.o main.o network.o tty.o +ipwireless-y := hardware.o main.o network.o tty.o From 5a780fc096397f9b62bc6dddced509f012a97e78 Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Wed, 27 Oct 2010 15:34:25 -0700 Subject: [PATCH 103/139] drivers/char/rio/Makefile: replace the use of -objs with -y Changed -objs to -y in Makefile. Signed-off-by: Tracey Dent Cc: Jiri Slaby Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/rio/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/rio/Makefile b/drivers/char/rio/Makefile index 2d1c5a7cba7d..1661875883fb 100644 --- a/drivers/char/rio/Makefile +++ b/drivers/char/rio/Makefile @@ -8,5 +8,5 @@ obj-$(CONFIG_RIO) += rio.o -rio-objs := rio_linux.o rioinit.o rioboot.o riocmd.o rioctrl.o riointr.o \ +rio-y := rio_linux.o rioinit.o rioboot.o riocmd.o rioctrl.o riointr.o \ rioparam.o rioroute.o riotable.o riotty.o From e89d67cfbcc1ee32339da9e816489f69742c3a6e Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Wed, 27 Oct 2010 15:34:26 -0700 Subject: [PATCH 104/139] drivers/char/mxser.c: fix compilation warning in mxser.c Both mxser_disable_must_enchance_mode() and mxser_get_must_hardware_id() called from function CheckIsMoxaMust(), when CONFIG_PCI=y. So mark both the functions under CONFIG_PCI. We were warned by the following warning. drivers/char/mxser.c:306: warning: `mxser_disable_must_enchance_mode' defined but not used drivers/char/mxser.c:391: warning: `mxser_get_must_hardware_id' defined but not used Signed-off-by: Rakib Mullick Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mxser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 463df27494bd..dd9d75351cd6 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -303,6 +303,7 @@ static void mxser_enable_must_enchance_mode(unsigned long baseio) outb(oldlcr, baseio + UART_LCR); } +#ifdef CONFIG_PCI static void mxser_disable_must_enchance_mode(unsigned long baseio) { u8 oldlcr; @@ -317,6 +318,7 @@ static void mxser_disable_must_enchance_mode(unsigned long baseio) outb(efr, baseio + MOXA_MUST_EFR_REGISTER); outb(oldlcr, baseio + UART_LCR); } +#endif static void mxser_set_must_xon1_value(unsigned long baseio, u8 value) { @@ -388,6 +390,7 @@ static void mxser_set_must_enum_value(unsigned long baseio, u8 value) outb(oldlcr, baseio + UART_LCR); } +#ifdef CONFIG_PCI static void mxser_get_must_hardware_id(unsigned long baseio, u8 *pId) { u8 oldlcr; @@ -404,6 +407,7 @@ static void mxser_get_must_hardware_id(unsigned long baseio, u8 *pId) *pId = inb(baseio + MOXA_MUST_HWID_REGISTER); outb(oldlcr, baseio + UART_LCR); } +#endif static void SET_MOXA_MUST_NO_SOFTWARE_FLOW_CONTROL(unsigned long baseio) { From 2c70f022e2e1b1493e157dbc3796b1f70a3ff162 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:26 -0700 Subject: [PATCH 105/139] rapidio: fix RapidIO sysfs hierarchy This set of RapidIO patches extends support for standard error recovery mechanism and adds new IDT Gen2 sRIO switch devices - CPS-1848 and CPS-1616. Implementation of the standard error-stopped state recovery mechanism (as defined by the RapidIO specification) is required for the new switches. Version 2 of this set of patches addresses received comments and fixes an error notification setup issue found in the idt_gen2.c after the first version was released. This patch: Make RapidIO devices appear in /sys/devices/rapidio directory instead of top of /sys/devices directory. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-driver.c | 2 +- drivers/rapidio/rio-scan.c | 1 + include/linux/rio.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 3222fa3c808c..0f4a53bdaa3c 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -192,7 +192,7 @@ static int rio_match_bus(struct device *dev, struct device_driver *drv) out:return 0; } -static struct device rio_bus = { +struct device rio_bus = { .init_name = "rapidio", }; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 8070e074c739..1123be8f4b18 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -478,6 +478,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, } rdev->dev.bus = &rio_bus_type; + rdev->dev.parent = &rio_bus; device_initialize(&rdev->dev); rdev->dev.release = rio_release_dev; diff --git a/include/linux/rio.h b/include/linux/rio.h index bd6eb0ed34a7..84c9f8c5fb23 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -67,6 +67,7 @@ #define RIO_PW_MSG_SIZE 64 extern struct bus_type rio_bus_type; +extern struct device rio_bus; extern struct list_head rio_devices; /* list of all devices */ struct rio_mport; From 93e2cbd24e71f5eedf6e49e075973fda9b2135e8 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:28 -0700 Subject: [PATCH 106/139] rapidio:powerpc/85xx: modify RIO port-write interrupt handler - Rearrange RIO port-write interrupt handling to perform message buffering as soon as possible. - Modify to disable port-write controller when clearing Transaction Error (TE) bit. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 68 ++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 412763672d23..ed2ec7154917 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -87,6 +87,9 @@ #define RIO_IPWSR_PWD 0x00000008 #define RIO_IPWSR_PWB 0x00000004 +#define RIO_EPWISR_PINT 0x80000000 +#define RIO_EPWISR_PW 0x00000001 + #define RIO_MSG_DESC_SIZE 32 #define RIO_MSG_BUFFER_SIZE 4096 #define RIO_MIN_TX_RING_SIZE 2 @@ -1082,19 +1085,13 @@ fsl_rio_port_write_handler(int irq, void *dev_instance) struct rio_priv *priv = port->priv; u32 epwisr, tmp; + epwisr = in_be32(priv->regs_win + RIO_EPWISR); + if (!(epwisr & RIO_EPWISR_PW)) + goto pw_done; + ipwmr = in_be32(&priv->msg_regs->pwmr); ipwsr = in_be32(&priv->msg_regs->pwsr); - epwisr = in_be32(priv->regs_win + RIO_EPWISR); - if (epwisr & 0x80000000) { - tmp = in_be32(priv->regs_win + RIO_LTLEDCSR); - pr_info("RIO_LTLEDCSR = 0x%x\n", tmp); - out_be32(priv->regs_win + RIO_LTLEDCSR, 0); - } - - if (!(epwisr & 0x00000001)) - return IRQ_HANDLED; - #ifdef DEBUG_PW pr_debug("PW Int->IPWMR: 0x%08x IPWSR: 0x%08x (", ipwmr, ipwsr); if (ipwsr & RIO_IPWSR_QF) @@ -1109,20 +1106,6 @@ fsl_rio_port_write_handler(int irq, void *dev_instance) pr_debug(" PWB"); pr_debug(" )\n"); #endif - out_be32(&priv->msg_regs->pwsr, - ipwsr & (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD)); - - if ((ipwmr & RIO_IPWMR_EIE) && (ipwsr & RIO_IPWSR_TE)) { - priv->port_write_msg.err_count++; - pr_info("RIO: Port-Write Transaction Err (%d)\n", - priv->port_write_msg.err_count); - } - if (ipwsr & RIO_IPWSR_PWD) { - priv->port_write_msg.discard_count++; - pr_info("RIO: Port Discarded Port-Write Msg(s) (%d)\n", - priv->port_write_msg.discard_count); - } - /* Schedule deferred processing if PW was received */ if (ipwsr & RIO_IPWSR_QFI) { /* Save PW message (if there is room in FIFO), @@ -1134,16 +1117,43 @@ fsl_rio_port_write_handler(int irq, void *dev_instance) RIO_PW_MSG_SIZE); } else { priv->port_write_msg.discard_count++; - pr_info("RIO: ISR Discarded Port-Write Msg(s) (%d)\n", + pr_debug("RIO: ISR Discarded Port-Write Msg(s) (%d)\n", priv->port_write_msg.discard_count); } + /* Clear interrupt and issue Clear Queue command. This allows + * another port-write to be received. + */ + out_be32(&priv->msg_regs->pwsr, RIO_IPWSR_QFI); + out_be32(&priv->msg_regs->pwmr, ipwmr | RIO_IPWMR_CQ); + schedule_work(&priv->pw_work); } - /* Issue Clear Queue command. This allows another - * port-write to be received. - */ - out_be32(&priv->msg_regs->pwmr, ipwmr | RIO_IPWMR_CQ); + if ((ipwmr & RIO_IPWMR_EIE) && (ipwsr & RIO_IPWSR_TE)) { + priv->port_write_msg.err_count++; + pr_debug("RIO: Port-Write Transaction Err (%d)\n", + priv->port_write_msg.err_count); + /* Clear Transaction Error: port-write controller should be + * disabled when clearing this error + */ + out_be32(&priv->msg_regs->pwmr, ipwmr & ~RIO_IPWMR_PWE); + out_be32(&priv->msg_regs->pwsr, RIO_IPWSR_TE); + out_be32(&priv->msg_regs->pwmr, ipwmr); + } + + if (ipwsr & RIO_IPWSR_PWD) { + priv->port_write_msg.discard_count++; + pr_debug("RIO: Port Discarded Port-Write Msg(s) (%d)\n", + priv->port_write_msg.discard_count); + out_be32(&priv->msg_regs->pwsr, RIO_IPWSR_PWD); + } + +pw_done: + if (epwisr & RIO_EPWISR_PINT) { + tmp = in_be32(priv->regs_win + RIO_LTLEDCSR); + pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp); + out_be32(priv->regs_win + RIO_LTLEDCSR, 0); + } return IRQ_HANDLED; } From ae05cbd5adef897d405ce8f90484c1239f79e086 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:29 -0700 Subject: [PATCH 107/139] rapidio: use stored ingress port number instead of register read The switch port information is obtained and stored during RIO device setup. Therefore repeated reads from Switch Port Information CAR may be removed. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 36 +++++++++--------------------------- include/linux/rio.h | 4 +++- include/linux/rio_regs.h | 2 ++ 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 1123be8f4b18..d09c359844f3 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -420,6 +420,11 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, hopcount, RIO_EFB_ERR_MGMNT); } + if (rdev->pef & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) { + rio_mport_read_config_32(port, destid, hopcount, + RIO_SWP_INFO_CAR, &rdev->swpinfo); + } + rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR, &rdev->src_ops); rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR, @@ -439,8 +444,6 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, /* If a PE has both switch and other functions, show it as a switch */ if (rio_is_switch(rdev)) { - rio_mport_read_config_32(port, destid, hopcount, - RIO_SWP_INFO_CAR, &rdev->swpinfo); rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL); if (!rswitch) goto cleanup; @@ -458,6 +461,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; rdev->rswitch = rswitch; + rswitch->rdev = rdev; dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id, rdev->rswitch->switchid); rio_switch_init(rdev, do_enum); @@ -718,25 +722,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_get_swpinfo_inport- Gets the ingress port number - * @mport: Master port to send transaction - * @destid: Destination ID associated with the switch - * @hopcount: Number of hops to the device - * - * Returns port number being used to access the switch device. - */ -static u8 -rio_get_swpinfo_inport(struct rio_mport *mport, u16 destid, u8 hopcount) -{ - u32 result; - - rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, - &result); - - return (u8) (result & 0xff); -} - /** * rio_get_swpinfo_tports- Gets total number of ports on the switch * @mport: Master port to send transaction @@ -834,8 +819,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rio_is_switch(rdev)) { next_switchid++; - sw_inport = rio_get_swpinfo_inport(port, - RIO_ANY_DESTID(port->sys_size), hopcount); + sw_inport = RIO_GET_PORT_NUM(rdev->swpinfo); rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, port->host_deviceid, sw_inport, 0); rdev->rswitch->route_table[port->host_deviceid] = sw_inport; @@ -989,8 +973,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", rio_name(rdev), rdev->vid, rdev->did, num_ports); for (port_num = 0; port_num < num_ports; port_num++) { - if (rio_get_swpinfo_inport(port, destid, hopcount) == - port_num) + if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; if (rio_sport_is_active @@ -1109,8 +1092,7 @@ static void rio_update_route_tables(struct rio_mport *port) if (rswitch->destid == destid) continue; - sport = rio_get_swpinfo_inport(port, - rswitch->destid, rswitch->hopcount); + sport = RIO_GET_PORT_NUM(rswitch->rdev->swpinfo); if (rswitch->add_entry) { rio_route_add_entry(port, rswitch, diff --git a/include/linux/rio.h b/include/linux/rio.h index 84c9f8c5fb23..ffdfe5ad43bf 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -112,7 +112,7 @@ struct rio_dev { u16 asm_rev; u16 efptr; u32 pef; - u32 swpinfo; /* Only used for switches */ + u32 swpinfo; u32 src_ops; u32 dst_ops; u32 comp_tag; @@ -219,6 +219,7 @@ struct rio_net { /** * struct rio_switch - RIO switch info * @node: Node in global list of switches + * @rdev: Associated RIO device structure * @switchid: Switch ID that is unique across a network * @hopcount: Hopcount to this switch * @destid: Associated destid in the path @@ -234,6 +235,7 @@ struct rio_net { */ struct rio_switch { struct list_head node; + struct rio_dev *rdev; u16 switchid; u16 hopcount; u16 destid; diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index aedee0489fb4..be80b1b21815 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -33,6 +33,7 @@ #define RIO_PEF_MEMORY 0x40000000 /* [I] MMIO */ #define RIO_PEF_PROCESSOR 0x20000000 /* [I] Processor */ #define RIO_PEF_SWITCH 0x10000000 /* [I] Switch */ +#define RIO_PEF_MULTIPORT 0x08000000 /* [VI, 2.1] Multiport */ #define RIO_PEF_INB_MBOX 0x00f00000 /* [II] Mailboxes */ #define RIO_PEF_INB_MBOX0 0x00800000 /* [II] Mailbox 0 */ #define RIO_PEF_INB_MBOX1 0x00400000 /* [II] Mailbox 1 */ @@ -51,6 +52,7 @@ #define RIO_SWP_INFO_PORT_TOTAL_MASK 0x0000ff00 /* [I] Total number of ports */ #define RIO_SWP_INFO_PORT_NUM_MASK 0x000000ff /* [I] Maintenance transaction port number */ #define RIO_GET_TOTAL_PORTS(x) ((x & RIO_SWP_INFO_PORT_TOTAL_MASK) >> 8) +#define RIO_GET_PORT_NUM(x) (x & RIO_SWP_INFO_PORT_NUM_MASK) #define RIO_SRC_OPS_CAR 0x18 /* [I] Source Operations CAR */ #define RIO_SRC_OPS_READ 0x00008000 /* [I] Read op */ From 68fe4df5d21294401959fa61d5a7094705ed8f6f Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:29 -0700 Subject: [PATCH 108/139] rapidio: add relation links between RIO device structures Create back and forward links between RIO devices. These links are intended for use by error management and hot-plug extensions. Links for redundant RIO connections between switches are not set (will be fixed in a separate patch). Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 56 ++++++++++++++++---------------------- include/linux/rio.h | 4 +++ 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index d09c359844f3..d2ea01872d6a 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -444,7 +444,10 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, /* If a PE has both switch and other functions, show it as a switch */ if (rio_is_switch(rdev)) { - rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL); + rswitch = kzalloc(sizeof(*rswitch) + + RIO_GET_TOTAL_PORTS(rdev->swpinfo) * + sizeof(rswitch->nextdev[0]), + GFP_KERNEL); if (!rswitch) goto cleanup; rswitch->switchid = next_switchid; @@ -722,25 +725,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_get_swpinfo_tports- Gets total number of ports on the switch - * @mport: Master port to send transaction - * @destid: Destination ID associated with the switch - * @hopcount: Number of hops to the device - * - * Returns total numbers of ports implemented by the switch device. - */ -static u8 rio_get_swpinfo_tports(struct rio_mport *mport, u16 destid, - u8 hopcount) -{ - u32 result; - - rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, - &result); - - return RIO_GET_TOTAL_PORTS(result); -} - /** * rio_net_add_mport- Add a master port to a RIO network * @net: RIO network @@ -761,15 +745,16 @@ static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port) * @net: RIO network being enumerated * @port: Master port to send transactions * @hopcount: Number of hops into the network + * @prev: Previous RIO device connected to the enumerated one + * @prev_port: Port on previous RIO device * * Recursively enumerates a RIO network. Transactions are sent via the * master port passed in @port. */ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, - u8 hopcount) + u8 hopcount, struct rio_dev *prev, int prev_port) { int port_num; - int num_ports; int cur_destid; int sw_destid; int sw_inport; @@ -814,6 +799,9 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rdev) { /* Add device to the global and bus/net specific list. */ list_add_tail(&rdev->net_list, &net->devices); + rdev->prev = prev; + if (prev && rio_is_switch(prev)) + prev->rswitch->nextdev[prev_port] = rdev; } else return -1; @@ -832,14 +820,14 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, rdev->rswitch->route_table[destid] = sw_inport; } - num_ports = - rio_get_swpinfo_tports(port, RIO_ANY_DESTID(port->sys_size), - hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", - rio_name(rdev), rdev->vid, rdev->did, num_ports); + rio_name(rdev), rdev->vid, rdev->did, + RIO_GET_TOTAL_PORTS(rdev->swpinfo)); sw_destid = next_destid; - for (port_num = 0; port_num < num_ports; port_num++) { + for (port_num = 0; + port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo); + port_num++) { /*Enable Input Output Port (transmitter reviever)*/ rio_enable_rx_tx_port(port, 0, RIO_ANY_DESTID(port->sys_size), @@ -864,7 +852,8 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, RIO_ANY_DESTID(port->sys_size), port_num, 0); - if (rio_enum_peer(net, port, hopcount + 1) < 0) + if (rio_enum_peer(net, port, hopcount + 1, + rdev, port_num) < 0) return -1; /* Update routing tables */ @@ -951,7 +940,6 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, u8 hopcount) { u8 port_num, route_port; - int num_ports; struct rio_dev *rdev; u16 ndestid; @@ -968,11 +956,13 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, /* Associated destid is how we accessed this switch */ rdev->rswitch->destid = destid; - num_ports = rio_get_swpinfo_tports(port, destid, hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", - rio_name(rdev), rdev->vid, rdev->did, num_ports); - for (port_num = 0; port_num < num_ports; port_num++) { + rio_name(rdev), rdev->vid, rdev->did, + RIO_GET_TOTAL_PORTS(rdev->swpinfo)); + for (port_num = 0; + port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo); + port_num++) { if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; @@ -1167,7 +1157,7 @@ int __devinit rio_enum_mport(struct rio_mport *mport) /* Enable Input Output Port (transmitter reviever) */ rio_enable_rx_tx_port(mport, 1, 0, 0, 0); - if (rio_enum_peer(net, mport, 0) < 0) { + if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) { /* A higher priority host won enumeration, bail. */ printk(KERN_INFO "RIO: master port %d device has lost enumeration to a remote host\n", diff --git a/include/linux/rio.h b/include/linux/rio.h index ffdfe5ad43bf..8d9e66dc7969 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -99,6 +99,7 @@ union rio_pw_msg; * @riores: RIO resources this device owns * @pwcback: port-write callback function for this device * @destid: Network destination ID + * @prev: Previous RIO device connected to the current one */ struct rio_dev { struct list_head global_list; /* node in list of all RIO devices */ @@ -125,6 +126,7 @@ struct rio_dev { struct resource riores[RIO_MAX_DEV_RESOURCES]; int (*pwcback) (struct rio_dev *rdev, union rio_pw_msg *msg, int step); u16 destid; + struct rio_dev *prev; }; #define rio_dev_g(n) list_entry(n, struct rio_dev, global_list) @@ -232,6 +234,7 @@ struct rio_net { * @get_domain: Callback for switch-specific domain get function * @em_init: Callback for switch-specific error management initialization function * @em_handle: Callback for switch-specific error management handler function + * @nextdev: Array of per-port pointers to the next attached device */ struct rio_switch { struct list_head node; @@ -253,6 +256,7 @@ struct rio_switch { u8 *sw_domain); int (*em_init) (struct rio_dev *dev); int (*em_handle) (struct rio_dev *dev, u8 swport); + struct rio_dev *nextdev[0]; }; /* Low-level architecture-dependent routines */ From dd5648c9f53b5cbd9f948d752624400545f979fb Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:30 -0700 Subject: [PATCH 109/139] rapidio: add default handler for error-stopped state The default error-stopped state handler provides recovery mechanism as defined by RIO specification. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 220 +++++++++++++++++++++++++----- drivers/rapidio/switches/idtcps.c | 10 ++ drivers/rapidio/switches/tsi57x.c | 4 + include/linux/rio_regs.h | 8 +- 4 files changed, 207 insertions(+), 35 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 74e9d22d95fb..77bd4165238f 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -494,6 +494,148 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) return 0; } +/** + * rio_get_input_status - Sends a Link-Request/Input-Status control symbol and + * returns link-response (if requested). + * @rdev: RIO devive to issue Input-status command + * @pnum: Device port number to issue the command + * @lnkresp: Response from a link partner + */ +static int +rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval; + int checkcount; + + if (lnkresp) { + /* Read from link maintenance response register + * to clear valid bit */ + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + ®val); + udelay(50); + } + + /* Issue Input-status command */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(pnum), + RIO_MNT_REQ_CMD_IS); + + /* Exit if the response is not expected */ + if (lnkresp == NULL) + return 0; + + checkcount = 3; + while (checkcount--) { + udelay(50); + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + ®val); + if (regval & RIO_PORT_N_MNT_RSP_RVAL) { + *lnkresp = regval; + return 0; + } + } + + return -EIO; +} + +/** + * rio_clr_err_stopped - Clears port Error-stopped states. + * @rdev: Pointer to RIO device control structure + * @pnum: Switch port number to clear errors + * @err_status: port error status (if 0 reads register from device) + */ +static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + struct rio_dev *nextdev = rdev->rswitch->nextdev[pnum]; + u32 regval; + u32 far_ackid, far_linkstat, near_ackid; + + if (err_status == 0) + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + + if (err_status & RIO_PORT_N_ERR_STS_PW_OUT_ES) { + pr_debug("RIO_EM: servicing Output Error-Stopped state\n"); + /* + * Send a Link-Request/Input-Status control symbol + */ + if (rio_get_input_status(rdev, pnum, ®val)) { + pr_debug("RIO_EM: Input-status response timeout\n"); + goto rd_err; + } + + pr_debug("RIO_EM: SP%d Input-status response=0x%08x\n", + pnum, regval); + far_ackid = (regval & RIO_PORT_N_MNT_RSP_ASTAT) >> 5; + far_linkstat = regval & RIO_PORT_N_MNT_RSP_LSTAT; + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + ®val); + pr_debug("RIO_EM: SP%d_ACK_STS_CSR=0x%08x\n", pnum, regval); + near_ackid = (regval & RIO_PORT_N_ACK_INBOUND) >> 24; + pr_debug("RIO_EM: SP%d far_ackID=0x%02x far_linkstat=0x%02x" \ + " near_ackID=0x%02x\n", + pnum, far_ackid, far_linkstat, near_ackid); + + /* + * If required, synchronize ackIDs of near and + * far sides. + */ + if ((far_ackid != ((regval & RIO_PORT_N_ACK_OUTSTAND) >> 8)) || + (far_ackid != (regval & RIO_PORT_N_ACK_OUTBOUND))) { + /* Align near outstanding/outbound ackIDs with + * far inbound. + */ + rio_mport_write_config_32(mport, destid, + hopcount, rdev->phys_efptr + + RIO_PORT_N_ACK_STS_CSR(pnum), + (near_ackid << 24) | + (far_ackid << 8) | far_ackid); + /* Align far outstanding/outbound ackIDs with + * near inbound. + */ + far_ackid++; + if (nextdev) + rio_write_config_32(nextdev, + nextdev->phys_efptr + + RIO_PORT_N_ACK_STS_CSR(RIO_GET_PORT_NUM(nextdev->swpinfo)), + (far_ackid << 24) | + (near_ackid << 8) | near_ackid); + else + pr_debug("RIO_EM: Invalid nextdev pointer (NULL)\n"); + } +rd_err: + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); + } + + if ((err_status & RIO_PORT_N_ERR_STS_PW_INP_ES) && nextdev) { + pr_debug("RIO_EM: servicing Input Error-Stopped state\n"); + rio_get_input_status(nextdev, + RIO_GET_PORT_NUM(nextdev->swpinfo), NULL); + udelay(50); + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); + } + + return (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | + RIO_PORT_N_ERR_STS_PW_INP_ES)) ? 1 : 0; +} + /** * rio_inb_pwrite_handler - process inbound port-write message * @pw_msg: pointer to inbound port-write message @@ -507,7 +649,7 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) struct rio_mport *mport; u8 hopcount; u16 destid; - u32 err_status; + u32 err_status, em_perrdet, em_ltlerrdet; int rc, portnum; rdev = rio_get_comptag(pw_msg->em.comptag, NULL); @@ -524,12 +666,11 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) { u32 i; for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) { - pr_debug("0x%02x: %08x %08x %08x %08x", + pr_debug("0x%02x: %08x %08x %08x %08x\n", i*4, pw_msg->raw[i], pw_msg->raw[i + 1], pw_msg->raw[i + 2], pw_msg->raw[i + 3]); i += 4; } - pr_debug("\n"); } #endif @@ -573,29 +714,28 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) &err_status); pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status); - if (pw_msg->em.errdetect) { - pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n", - portnum, pw_msg->em.errdetect); - /* Clear EM Port N Error Detect CSR */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0); - } + if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { - if (pw_msg->em.ltlerrdet) { - pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n", - pw_msg->em.ltlerrdet); - /* Clear EM L/T Layer Error Detect CSR */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); - } + if (!(rdev->rswitch->port_ok & (1 << portnum))) { + rdev->rswitch->port_ok |= (1 << portnum); + rio_set_port_lockout(rdev, portnum, 0); + /* Schedule Insertion Service */ + pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n", + rio_name(rdev), portnum); + } - /* Clear Port Errors */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status & RIO_PORT_N_ERR_STS_CLR_MASK); + /* Clear error-stopped states (if reported). + * Depending on the link partner state, two attempts + * may be needed for successful recovery. + */ + if (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | + RIO_PORT_N_ERR_STS_PW_INP_ES)) { + if (rio_clr_err_stopped(rdev, portnum, err_status)) + rio_clr_err_stopped(rdev, portnum, 0); + } + } else { /* if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) */ - if (rdev->rswitch->port_ok & (1 << portnum)) { - if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) { + if (rdev->rswitch->port_ok & (1 << portnum)) { rdev->rswitch->port_ok &= ~(1 << portnum); rio_set_port_lockout(rdev, portnum, 1); @@ -608,17 +748,33 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n", rio_name(rdev), portnum); } - } else { - if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { - rdev->rswitch->port_ok |= (1 << portnum); - rio_set_port_lockout(rdev, portnum, 0); - - /* Schedule Insertion Service */ - pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n", - rio_name(rdev), portnum); - } } + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet); + if (em_perrdet) { + pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n", + portnum, em_perrdet); + /* Clear EM Port N Error Detect CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0); + } + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet); + if (em_ltlerrdet) { + pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n", + em_ltlerrdet); + /* Clear EM L/T Layer Error Detect CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); + } + + /* Clear remaining error bits */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), + err_status & RIO_PORT_N_ERR_STS_CLR_MASK); + /* Clear Port-Write Pending bit */ rio_mport_write_config_32(mport, destid, hopcount, rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c index 2c790c144f89..fc9f6374f759 100644 --- a/drivers/rapidio/switches/idtcps.c +++ b/drivers/rapidio/switches/idtcps.c @@ -117,6 +117,10 @@ idtcps_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount, static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) { + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); rdev->rswitch->add_entry = idtcps_route_add_entry; rdev->rswitch->get_entry = idtcps_route_get_entry; @@ -126,6 +130,12 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) rdev->rswitch->em_init = NULL; rdev->rswitch->em_handle = NULL; + if (do_enum) { + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + } + return 0; } diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index d34df722d95f..d9e94920e8b0 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -205,6 +205,10 @@ tsi57x_em_init(struct rio_dev *rdev) portnum++; } + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x9a << 8); + return 0; } diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index be80b1b21815..daa269d18e07 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -224,15 +224,17 @@ #define RIO_PORT_GEN_MASTER 0x40000000 #define RIO_PORT_GEN_DISCOVERED 0x20000000 #define RIO_PORT_N_MNT_REQ_CSR(x) (0x0040 + x*0x20) /* 0x0002 */ +#define RIO_MNT_REQ_CMD_RD 0x03 /* Reset-device command */ +#define RIO_MNT_REQ_CMD_IS 0x04 /* Input-status command */ #define RIO_PORT_N_MNT_RSP_CSR(x) (0x0044 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_MNT_RSP_RVAL 0x80000000 /* Response Valid */ #define RIO_PORT_N_MNT_RSP_ASTAT 0x000003e0 /* ackID Status */ #define RIO_PORT_N_MNT_RSP_LSTAT 0x0000001f /* Link Status */ #define RIO_PORT_N_ACK_STS_CSR(x) (0x0048 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_ACK_CLEAR 0x80000000 -#define RIO_PORT_N_ACK_INBOUND 0x1f000000 -#define RIO_PORT_N_ACK_OUTSTAND 0x00001f00 -#define RIO_PORT_N_ACK_OUTBOUND 0x0000001f +#define RIO_PORT_N_ACK_INBOUND 0x3f000000 +#define RIO_PORT_N_ACK_OUTSTAND 0x00003f00 +#define RIO_PORT_N_ACK_OUTBOUND 0x0000003f #define RIO_PORT_N_ERR_STS_CSR(x) (0x0058 + x*0x20) #define RIO_PORT_N_ERR_STS_PW_OUT_ES 0x00010000 /* Output Error-stopped */ #define RIO_PORT_N_ERR_STS_PW_INP_ES 0x00000100 /* Input Error-stopped */ From ac38d7232dfa3c71b129bab3318ba327bbcf8405 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:31 -0700 Subject: [PATCH 110/139] rapidio: modify sysfs initialization for switches 1. Change to create attribute "routes" only for switches. 2. Add a switch-specific callback to create/remove proprietary attributes. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-sysfs.c | 26 +++++++++++++++++++------- include/linux/rio.h | 6 ++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 00b475658356..137ed93ee33f 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -40,9 +40,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch char *str = buf; int i; - if (!rdev->rswitch) - goto out; - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); i++) { if (rdev->rswitch->route_table[i] == RIO_INVALID_ROUTE) @@ -52,7 +49,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch rdev->rswitch->route_table[i]); } - out: return (str - buf); } @@ -63,10 +59,11 @@ struct device_attribute rio_dev_attrs[] = { __ATTR_RO(asm_did), __ATTR_RO(asm_vid), __ATTR_RO(asm_rev), - __ATTR_RO(routes), __ATTR_NULL, }; +static DEVICE_ATTR(routes, S_IRUGO, routes_show, NULL); + static ssize_t rio_read_config(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -218,7 +215,17 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev) { int err = 0; - err = sysfs_create_bin_file(&rdev->dev.kobj, &rio_config_attr); + err = device_create_bin_file(&rdev->dev, &rio_config_attr); + + if (!err && rdev->rswitch) { + err = device_create_file(&rdev->dev, &dev_attr_routes); + if (!err && rdev->rswitch->sw_sysfs) + err = rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_CREATE); + } + + if (err) + pr_warning("RIO: Failed to create attribute file(s) for %s\n", + rio_name(rdev)); return err; } @@ -231,5 +238,10 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev) */ void rio_remove_sysfs_dev_files(struct rio_dev *rdev) { - sysfs_remove_bin_file(&rdev->dev.kobj, &rio_config_attr); + device_remove_bin_file(&rdev->dev, &rio_config_attr); + if (rdev->rswitch) { + device_remove_file(&rdev->dev, &dev_attr_routes); + if (rdev->rswitch->sw_sysfs) + rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_REMOVE); + } } diff --git a/include/linux/rio.h b/include/linux/rio.h index 8d9e66dc7969..4fa5e3d2b117 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -218,6 +218,10 @@ struct rio_net { unsigned char id; /* RIO network ID */ }; +/* Definitions used by switch sysfs initialization callback */ +#define RIO_SW_SYSFS_CREATE 1 /* Create switch attributes */ +#define RIO_SW_SYSFS_REMOVE 0 /* Remove switch attributes */ + /** * struct rio_switch - RIO switch info * @node: Node in global list of switches @@ -234,6 +238,7 @@ struct rio_net { * @get_domain: Callback for switch-specific domain get function * @em_init: Callback for switch-specific error management initialization function * @em_handle: Callback for switch-specific error management handler function + * @sw_sysfs: Callback that initializes switch-specific sysfs attributes * @nextdev: Array of per-port pointers to the next attached device */ struct rio_switch { @@ -256,6 +261,7 @@ struct rio_switch { u8 *sw_domain); int (*em_init) (struct rio_dev *dev); int (*em_handle) (struct rio_dev *dev, u8 swport); + int (*sw_sysfs) (struct rio_dev *dev, int create); struct rio_dev *nextdev[0]; }; From 6429cd49f45450cd77a57b70b0dfa98fe2794da0 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:32 -0700 Subject: [PATCH 111/139] rapidio: add handling of orphan port-write message Add check for access to port-write (PW) message source device before processing the PW message. If source RIO device is not available (power down or RIO link failure) trace back to a last available switch/port on the PW message route and service failure at that point. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 113 ++++++++++++++++++++++++++++++++++++++++-- drivers/rapidio/rio.h | 2 + 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 77bd4165238f..aefc2a0004d4 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -494,6 +494,92 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) return 0; } +/** + * rio_chk_dev_route - Validate route to the specified device. + * @rdev: RIO device failed to respond + * @nrdev: Last active device on the route to rdev + * @npnum: nrdev's port number on the route to rdev + * + * Follows a route to the specified RIO device to determine the last available + * device (and corresponding RIO port) on the route. + */ +static int +rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) +{ + u32 result; + int p_port, rc = -EIO; + struct rio_dev *prev = NULL; + + /* Find switch with failed RIO link */ + while (rdev->prev && (rdev->prev->pef & RIO_PEF_SWITCH)) { + if (!rio_read_config_32(rdev->prev, RIO_DEV_ID_CAR, &result)) { + prev = rdev->prev; + break; + } + rdev = rdev->prev; + } + + if (prev == NULL) + goto err_out; + + /* Find port with failed RIO link */ + for (p_port = 0; + p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo); p_port++) + if (prev->rswitch->nextdev[p_port] == rdev) + break; + + if (p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo)) { + pr_debug("RIO: link failed on [%s]-P%d\n", + rio_name(prev), p_port); + *nrdev = prev; + *npnum = p_port; + rc = 0; + } else + pr_debug("RIO: failed to trace route to %s\n", rio_name(prev)); +err_out: + return rc; +} + +/** + * rio_mport_chk_dev_access - Validate access to the specified device. + * @mport: Master port to send transactions + * @destid: Device destination ID in network + * @hopcount: Number of hops into the network + */ +static int +rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, u8 hopcount) +{ + int i = 0; + u32 tmp; + + while (rio_mport_read_config_32(mport, destid, hopcount, + RIO_DEV_ID_CAR, &tmp)) { + i++; + if (i == RIO_MAX_CHK_RETRY) + return -EIO; + mdelay(1); + } + + return 0; +} + +/** + * rio_chk_dev_access - Validate access to the specified device. + * @rdev: Pointer to RIO device control structure + */ +static int rio_chk_dev_access(struct rio_dev *rdev) +{ + u8 hopcount = 0xff; + u16 destid = rdev->destid; + + if (rdev->rswitch) { + destid = rdev->rswitch->destid; + hopcount = rdev->rswitch->hopcount; + } + + return rio_mport_chk_dev_access(rdev->net->hport, destid, hopcount); +} + /** * rio_get_input_status - Sends a Link-Request/Input-Status control symbol and * returns link-response (if requested). @@ -654,8 +740,8 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) rdev = rio_get_comptag(pw_msg->em.comptag, NULL); if (rdev == NULL) { - /* Someting bad here (probably enumeration error) */ - pr_err("RIO: %s No matching device for CTag 0x%08x\n", + /* Device removed or enumeration error */ + pr_debug("RIO: %s No matching device for CTag 0x%08x\n", __func__, pw_msg->em.comptag); return -EIO; } @@ -686,6 +772,26 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) return 0; } + portnum = pw_msg->em.is_port & 0xFF; + + /* Check if device and route to it are functional: + * Sometimes devices may send PW message(s) just before being + * powered down (or link being lost). + */ + if (rio_chk_dev_access(rdev)) { + pr_debug("RIO: device access failed - get link partner\n"); + /* Scan route to the device and identify failed link. + * This will replace device and port reported in PW message. + * PW message should not be used after this point. + */ + if (rio_chk_dev_route(rdev, &rdev, &portnum)) { + pr_err("RIO: Route trace for %s failed\n", + rio_name(rdev)); + return -EIO; + } + pw_msg = NULL; + } + /* For End-point devices processing stops here */ if (!(rdev->pef & RIO_PEF_SWITCH)) return 0; @@ -703,9 +809,6 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) /* * Process the port-write notification from switch */ - - portnum = pw_msg->em.is_port & 0xFF; - if (rdev->rswitch->em_handle) rdev->rswitch->em_handle(rdev, portnum); diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index f27b7a9c47d2..bc71ba1d239e 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -14,6 +14,8 @@ #include #include +#define RIO_MAX_CHK_RETRY 3 + /* Functions internal to the RIO core code */ extern u32 rio_mport_get_feature(struct rio_mport *mport, int local, u16 destid, From e274e0ed0a2ac31d5eaf7c891e4e1d99197517b2 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:32 -0700 Subject: [PATCH 112/139] rapidio: add device access check into the enumeration Add explicit device access check before performing device enumeration. This gives a chance to clear possible link error conditions by issuing safe maintenance read request(s). Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 6 ++++++ drivers/rapidio/rio.c | 2 +- drivers/rapidio/rio.h | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index d2ea01872d6a..e3efdf93df5a 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -762,6 +762,12 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, u16 destid; int tmp; + if (rio_mport_chk_dev_access(port, + RIO_ANY_DESTID(port->sys_size), hopcount)) { + pr_debug("RIO: device access check failed\n"); + return -1; + } + if (rio_get_host_deviceid_lock(port, hopcount) == port->host_deviceid) { pr_debug("RIO: PE already discovered by this host\n"); /* diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index aefc2a0004d4..fa5e3cbe4c83 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -546,7 +546,7 @@ err_out: * @destid: Device destination ID in network * @hopcount: Number of hops into the network */ -static int +int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, u8 hopcount) { int i = 0; diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index bc71ba1d239e..d249a1205c7d 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -24,6 +24,8 @@ extern u32 rio_mport_get_physefb(struct rio_mport *port, int local, u16 destid, u8 hopcount); extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid, u8 hopcount, u32 from); +extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, + u8 hopcount); extern int rio_create_sysfs_dev_files(struct rio_dev *rdev); extern int rio_enum_mport(struct rio_mport *mport); extern int rio_disc_mport(struct rio_mport *mport); From a3725c45c114bd06e091802f90533332d1e93819 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:33 -0700 Subject: [PATCH 113/139] rapidio: add support for IDT CPS Gen2 switches Add the RIO switch driver and definitions for IDT CPS-1848 and CPS-1616 Gen2 devices. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/Kconfig | 7 + drivers/rapidio/switches/Makefile | 1 + drivers/rapidio/switches/idt_gen2.c | 447 ++++++++++++++++++++++++++++ include/linux/rio_ids.h | 2 + include/linux/rio_regs.h | 5 + 5 files changed, 462 insertions(+) create mode 100644 drivers/rapidio/switches/idt_gen2.c diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index 2b4e9b2b6631..f47fee5d4563 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -20,6 +20,13 @@ config RAPIDIO_TSI568 ---help--- Includes support for IDT Tsi568 serial RapidIO switch. +config RAPIDIO_CPS_GEN2 + bool "IDT CPS Gen.2 SRIO switch support" + depends on RAPIDIO + default n + ---help--- + Includes support for ITD CPS Gen.2 serial RapidIO switches. + config RAPIDIO_TSI500 bool "Tsi500 Parallel RapidIO switch support" depends on RAPIDIO diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile index fe4adc3e8d5f..48d67a6b98c8 100644 --- a/drivers/rapidio/switches/Makefile +++ b/drivers/rapidio/switches/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o obj-$(CONFIG_RAPIDIO_TSI500) += tsi500.o +obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o ifeq ($(CONFIG_RAPIDIO_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c new file mode 100644 index 000000000000..0bb871cb5c40 --- /dev/null +++ b/drivers/rapidio/switches/idt_gen2.c @@ -0,0 +1,447 @@ +/* + * IDT CPS Gen.2 Serial RapidIO switch family support + * + * Copyright 2010 Integrated Device Technology, Inc. + * Alexandre Bounine + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include "../rio.h" + +#define LOCAL_RTE_CONF_DESTID_SEL 0x010070 +#define LOCAL_RTE_CONF_DESTID_SEL_PSEL 0x0000001f + +#define IDT_LT_ERR_REPORT_EN 0x03100c + +#define IDT_PORT_ERR_REPORT_EN(n) (0x031044 + (n)*0x40) +#define IDT_PORT_ERR_REPORT_EN_BC 0x03ff04 + +#define IDT_PORT_ISERR_REPORT_EN(n) (0x03104C + (n)*0x40) +#define IDT_PORT_ISERR_REPORT_EN_BC 0x03ff0c +#define IDT_PORT_INIT_TX_ACQUIRED 0x00000020 + +#define IDT_LANE_ERR_REPORT_EN(n) (0x038010 + (n)*0x100) +#define IDT_LANE_ERR_REPORT_EN_BC 0x03ff10 + +#define IDT_DEV_CTRL_1 0xf2000c +#define IDT_DEV_CTRL_1_GENPW 0x02000000 +#define IDT_DEV_CTRL_1_PRSTBEH 0x00000001 + +#define IDT_CFGBLK_ERR_CAPTURE_EN 0x020008 +#define IDT_CFGBLK_ERR_REPORT 0xf20014 +#define IDT_CFGBLK_ERR_REPORT_GENPW 0x00000002 + +#define IDT_AUX_PORT_ERR_CAP_EN 0x020000 +#define IDT_AUX_ERR_REPORT_EN 0xf20018 +#define IDT_AUX_PORT_ERR_LOG_I2C 0x00000002 +#define IDT_AUX_PORT_ERR_LOG_JTAG 0x00000001 + +#define IDT_ISLTL_ADDRESS_CAP 0x021014 + +#define IDT_RIO_DOMAIN 0xf20020 +#define IDT_RIO_DOMAIN_MASK 0x000000ff + +#define IDT_PW_INFO_CSR 0xf20024 + +#define IDT_SOFT_RESET 0xf20040 +#define IDT_SOFT_RESET_REQ 0x00030097 + +#define IDT_I2C_MCTRL 0xf20050 +#define IDT_I2C_MCTRL_GENPW 0x04000000 + +#define IDT_JTAG_CTRL 0xf2005c +#define IDT_JTAG_CTRL_GENPW 0x00000002 + +#define IDT_LANE_CTRL(n) (0xff8000 + (n)*0x100) +#define IDT_LANE_CTRL_BC 0xffff00 +#define IDT_LANE_CTRL_GENPW 0x00200000 +#define IDT_LANE_DFE_1_BC 0xffff18 +#define IDT_LANE_DFE_2_BC 0xffff1c + +#define IDT_PORT_OPS(n) (0xf40004 + (n)*0x100) +#define IDT_PORT_OPS_GENPW 0x08000000 +#define IDT_PORT_OPS_PL_ELOG 0x00000040 +#define IDT_PORT_OPS_LL_ELOG 0x00000020 +#define IDT_PORT_OPS_LT_ELOG 0x00000010 +#define IDT_PORT_OPS_BC 0xf4ff04 + +#define IDT_PORT_ISERR_DET(n) (0xf40008 + (n)*0x100) + +#define IDT_ERR_CAP 0xfd0000 +#define IDT_ERR_CAP_LOG_OVERWR 0x00000004 + +#define IDT_ERR_RD 0xfd0004 + +#define IDT_DEFAULT_ROUTE 0xde +#define IDT_NO_ROUTE 0xdf + +static int +idtg2_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 route_port) +{ + /* + * Select routing table to update + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + /* + * Program destination port for the specified destID + */ + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, + (u32)route_destid); + + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, + (u32)route_port); + udelay(10); + + return 0; +} + +static int +idtg2_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 *route_port) +{ + u32 result; + + /* + * Select routing table to read + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, + route_destid); + + rio_mport_read_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, &result); + + if (IDT_DEFAULT_ROUTE == (u8)result || IDT_NO_ROUTE == (u8)result) + *route_port = RIO_INVALID_ROUTE; + else + *route_port = (u8)result; + + return 0; +} + +static int +idtg2_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table) +{ + u32 i; + + /* + * Select routing table to read + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + for (i = RIO_STD_RTE_CONF_EXTCFGEN; + i <= (RIO_STD_RTE_CONF_EXTCFGEN | 0xff);) { + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, i); + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, + (IDT_DEFAULT_ROUTE << 24) | (IDT_DEFAULT_ROUTE << 16) | + (IDT_DEFAULT_ROUTE << 8) | IDT_DEFAULT_ROUTE); + i += 4; + } + + return 0; +} + + +static int +idtg2_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount, + u8 sw_domain) +{ + /* + * Switch domain configuration operates only at global level + */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_RIO_DOMAIN, (u32)sw_domain); + return 0; +} + +static int +idtg2_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount, + u8 *sw_domain) +{ + u32 regval; + + /* + * Switch domain configuration operates only at global level + */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_RIO_DOMAIN, ®val); + + *sw_domain = (u8)(regval & 0xff); + + return 0; +} + +static int +idtg2_em_init(struct rio_dev *rdev) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval; + int i, tmp; + + /* + * This routine performs device-specific initialization only. + * All standard EM configuration should be performed at upper level. + */ + + pr_debug("RIO: %s [%d:%d]\n", __func__, destid, hopcount); + + /* Set Port-Write info CSR: PRIO=3 and CRF=1 */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PW_INFO_CSR, 0x0000e000); + + /* + * Configure LT LAYER error reporting. + */ + + /* Enable standard (RIO.p8) error reporting */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LT_ERR_REPORT_EN, + REM_LTL_ERR_ILLTRAN | REM_LTL_ERR_UNSOLR | + REM_LTL_ERR_UNSUPTR); + + /* Use Port-Writes for LT layer error reporting. + * Enable per-port reset + */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_DEV_CTRL_1, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_DEV_CTRL_1, + regval | IDT_DEV_CTRL_1_GENPW | IDT_DEV_CTRL_1_PRSTBEH); + + /* + * Configure PORT error reporting. + */ + + /* Report all RIO.p8 errors supported by device */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ERR_REPORT_EN_BC, 0x807e8037); + + /* Configure reporting of implementation specific errors/events */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_REPORT_EN_BC, IDT_PORT_INIT_TX_ACQUIRED); + + /* Use Port-Writes for port error reporting and enable error logging */ + tmp = RIO_GET_TOTAL_PORTS(rdev->swpinfo); + for (i = 0; i < tmp; i++) { + rio_mport_read_config_32(mport, destid, hopcount, + IDT_PORT_OPS(i), ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_OPS(i), regval | IDT_PORT_OPS_GENPW | + IDT_PORT_OPS_PL_ELOG | + IDT_PORT_OPS_LL_ELOG | + IDT_PORT_OPS_LT_ELOG); + } + /* Overwrite error log if full */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_ERR_CAP, IDT_ERR_CAP_LOG_OVERWR); + + /* + * Configure LANE error reporting. + */ + + /* Disable line error reporting */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LANE_ERR_REPORT_EN_BC, 0); + + /* Use Port-Writes for lane error reporting (when enabled) + * (do per-lane update because lanes may have different configuration) + */ + tmp = (rdev->did == RIO_DID_IDTCPS1848) ? 48 : 16; + for (i = 0; i < tmp; i++) { + rio_mport_read_config_32(mport, destid, hopcount, + IDT_LANE_CTRL(i), ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LANE_CTRL(i), regval | IDT_LANE_CTRL_GENPW); + } + + /* + * Configure AUX error reporting. + */ + + /* Disable JTAG and I2C Error capture */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_AUX_PORT_ERR_CAP_EN, 0); + + /* Disable JTAG and I2C Error reporting/logging */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_AUX_ERR_REPORT_EN, 0); + + /* Disable Port-Write notification from JTAG */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_JTAG_CTRL, 0); + + /* Disable Port-Write notification from I2C */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_I2C_MCTRL, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_I2C_MCTRL, + regval & ~IDT_I2C_MCTRL_GENPW); + + /* + * Configure CFG_BLK error reporting. + */ + + /* Disable Configuration Block error capture */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_CAPTURE_EN, 0); + + /* Disable Port-Writes for Configuration Block error reporting */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_REPORT, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_REPORT, + regval & ~IDT_CFGBLK_ERR_REPORT_GENPW); + + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + + return 0; +} + +static int +idtg2_em_handler(struct rio_dev *rdev, u8 portnum) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval, em_perrdet, em_ltlerrdet; + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet); + if (em_ltlerrdet) { + /* Service Logical/Transport Layer Error(s) */ + if (em_ltlerrdet & REM_LTL_ERR_IMPSPEC) { + /* Implementation specific error reported */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_ISLTL_ADDRESS_CAP, ®val); + + pr_debug("RIO: %s Implementation Specific LTL errors" \ + " 0x%x @(0x%x)\n", + rio_name(rdev), em_ltlerrdet, regval); + + /* Clear implementation specific address capture CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_ISLTL_ADDRESS_CAP, 0); + + } + } + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet); + if (em_perrdet) { + /* Service Port-Level Error(s) */ + if (em_perrdet & REM_PED_IMPL_SPEC) { + /* Implementation Specific port error reported */ + + /* Get IS errors reported */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_DET(portnum), ®val); + + pr_debug("RIO: %s Implementation Specific Port" \ + " errors 0x%x\n", rio_name(rdev), regval); + + /* Clear all implementation specific events */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_DET(portnum), 0); + } + } + + return 0; +} + +static ssize_t +idtg2_show_errlog(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct rio_dev *rdev = to_rio_dev(dev); + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + ssize_t len = 0; + u32 regval; + + while (!rio_mport_read_config_32(mport, destid, hopcount, + IDT_ERR_RD, ®val)) { + if (!regval) /* 0 = end of log */ + break; + len += snprintf(buf + len, PAGE_SIZE - len, + "%08x\n", regval); + if (len >= (PAGE_SIZE - 10)) + break; + } + + return len; +} + +static DEVICE_ATTR(errlog, S_IRUGO, idtg2_show_errlog, NULL); + +static int idtg2_sysfs(struct rio_dev *rdev, int create) +{ + struct device *dev = &rdev->dev; + int err = 0; + + if (create == RIO_SW_SYSFS_CREATE) { + /* Initialize sysfs entries */ + err = device_create_file(dev, &dev_attr_errlog); + if (err) + dev_err(dev, "Unable create sysfs errlog file\n"); + } else + device_remove_file(dev, &dev_attr_errlog); + + return err; +} + +static int idtg2_switch_init(struct rio_dev *rdev, int do_enum) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + rdev->rswitch->add_entry = idtg2_route_add_entry; + rdev->rswitch->get_entry = idtg2_route_get_entry; + rdev->rswitch->clr_table = idtg2_route_clr_table; + rdev->rswitch->set_domain = idtg2_set_domain; + rdev->rswitch->get_domain = idtg2_get_domain; + rdev->rswitch->em_init = idtg2_em_init; + rdev->rswitch->em_handle = idtg2_em_handler; + rdev->rswitch->sw_sysfs = idtg2_sysfs; + + return 0; +} + +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init); +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init); diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index db50e1c288b7..ee7b6ada188f 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -34,5 +34,7 @@ #define RIO_DID_IDTCPS16 0x035b #define RIO_DID_IDTCPS6Q 0x035f #define RIO_DID_IDTCPS10Q 0x035e +#define RIO_DID_IDTCPS1848 0x0374 +#define RIO_DID_IDTCPS1616 0x0379 #endif /* LINUX_RIO_IDS_H */ diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index daa269d18e07..a18b2e22aa1d 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -161,6 +161,7 @@ #define RIO_COMPONENT_TAG_CSR 0x6c /* [III] Component Tag CSR */ #define RIO_STD_RTE_CONF_DESTID_SEL_CSR 0x70 +#define RIO_STD_RTE_CONF_EXTCFGEN 0x80000000 #define RIO_STD_RTE_CONF_PORT_SEL_CSR 0x74 #define RIO_STD_RTE_DEFAULT_PORT 0x78 @@ -265,6 +266,10 @@ #define RIO_EM_EFB_HEADER 0x000 /* Error Management Extensions Block Header */ #define RIO_EM_LTL_ERR_DETECT 0x008 /* Logical/Transport Layer Error Detect CSR */ #define RIO_EM_LTL_ERR_EN 0x00c /* Logical/Transport Layer Error Enable CSR */ +#define REM_LTL_ERR_ILLTRAN 0x08000000 /* Illegal Transaction decode */ +#define REM_LTL_ERR_UNSOLR 0x00800000 /* Unsolicited Response */ +#define REM_LTL_ERR_UNSUPTR 0x00400000 /* Unsupported Transaction */ +#define REM_LTL_ERR_IMPSPEC 0x000000ff /* Implementation Specific */ #define RIO_EM_LTL_HIADDR_CAP 0x010 /* Logical/Transport Layer High Address Capture CSR */ #define RIO_EM_LTL_ADDR_CAP 0x014 /* Logical/Transport Layer Address Capture CSR */ #define RIO_EM_LTL_DEVID_CAP 0x018 /* Logical/Transport Layer Device ID Capture CSR */ From af84ca38aff94061dd0711edbb99b0900a9c28fd Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:34 -0700 Subject: [PATCH 114/139] rapidio: add handling of redundant routes Detects RIO link to the already enumerated device and properly sets links between device objects. Changes to the enumeration/discovery logic: 1. Use Master Enable bit to signal end of the enumeration - agents may start their discovery process as soon as they see this bit set (Component Tag register was used before for this purpose). 2. Enumerator sets Component Tag (!= 0) immediately during device setup. This allows to identify the device if the redundant route exists in a RIO system. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 8 ++++ drivers/rapidio/rio-scan.c | 88 ++++++++++++++++++----------------- drivers/rapidio/rio.c | 16 +++---- drivers/rapidio/rio.h | 1 + include/linux/rio.h | 2 + 5 files changed, 64 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index ed2ec7154917..9725369d432a 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -50,6 +50,7 @@ #define RIO_ATMU_REGS_OFFSET 0x10c00 #define RIO_P_MSG_REGS_OFFSET 0x11000 #define RIO_S_MSG_REGS_OFFSET 0x13000 +#define RIO_GCCSR 0x13c #define RIO_ESCSR 0x158 #define RIO_CCSR 0x15c #define RIO_LTLEDCSR 0x0608 @@ -1471,6 +1472,7 @@ int fsl_rio_setup(struct platform_device *dev) port->host_deviceid = fsl_rio_get_hdid(port->id); port->priv = priv; + port->phys_efptr = 0x100; rio_register_mport(port); priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); @@ -1518,6 +1520,12 @@ int fsl_rio_setup(struct platform_device *dev) dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", port->sys_size ? 65536 : 256); + if (port->host_deviceid >= 0) + out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST | + RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED); + else + out_be32(priv->regs_win + RIO_GCCSR, 0x00000000); + priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win + RIO_ATMU_REGS_OFFSET); priv->maint_atmu_regs = priv->atmu_regs + 1; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index e3efdf93df5a..1eb82c4c712e 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -48,7 +48,7 @@ DEFINE_SPINLOCK(rio_global_list_lock); static int next_destid = 0; static int next_switchid = 0; static int next_net = 0; -static int next_comptag; +static int next_comptag = 1; static struct timer_list rio_enum_timer = TIMER_INITIALIZER(rio_enum_timeout, 0, 0); @@ -121,27 +121,6 @@ static int rio_clear_locks(struct rio_mport *port) u32 result; int ret = 0; - /* Assign component tag to all devices */ - next_comptag = 1; - rio_local_write_config_32(port, RIO_COMPONENT_TAG_CSR, next_comptag++); - - list_for_each_entry(rdev, &rio_devices, global_list) { - /* Mark device as discovered */ - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, - &result); - rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, - result | RIO_PORT_GEN_DISCOVERED); - - rio_write_config_32(rdev, RIO_COMPONENT_TAG_CSR, next_comptag); - rdev->comp_tag = next_comptag++; - if (next_comptag >= 0x10000) { - pr_err("RIO: Component Tag Counter Overflow\n"); - break; - } - } - /* Release host device id locks */ rio_local_write_config_32(port, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); @@ -162,6 +141,15 @@ static int rio_clear_locks(struct rio_mport *port) rdev->vid, rdev->did); ret = -EINVAL; } + + /* Mark device as discovered and enable master */ + rio_read_config_32(rdev, + rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, + &result); + result |= RIO_PORT_GEN_DISCOVERED | RIO_PORT_GEN_MASTER; + rio_write_config_32(rdev, + rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, + result); } return ret; @@ -430,6 +418,17 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR, &rdev->dst_ops); + if (do_enum) { + /* Assign component tag to device */ + if (next_comptag >= 0x10000) { + pr_err("RIO: Component Tag Counter Overflow\n"); + goto cleanup; + } + rio_mport_write_config_32(port, destid, hopcount, + RIO_COMPONENT_TAG_CSR, next_comptag); + rdev->comp_tag = next_comptag++; + } + if (rio_device_has_destid(port, rdev->src_ops, rdev->dst_ops)) { if (do_enum) { rio_set_device_id(port, destid, hopcount, next_destid); @@ -725,21 +724,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_net_add_mport- Add a master port to a RIO network - * @net: RIO network - * @port: Master port to add - * - * Adds a master port to the network list of associated master - * ports.. - */ -static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port) -{ - spin_lock(&rio_global_list_lock); - list_add_tail(&port->nnode, &net->mports); - spin_unlock(&rio_global_list_lock); -} - /** * rio_enum_peer- Recursively enumerate a RIO network through a master port * @net: RIO network being enumerated @@ -760,6 +744,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, int sw_inport; struct rio_dev *rdev; u16 destid; + u32 regval; int tmp; if (rio_mport_chk_dev_access(port, @@ -772,9 +757,21 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, pr_debug("RIO: PE already discovered by this host\n"); /* * Already discovered by this host. Add it as another - * master port for the current network. + * link to the existing device. */ - rio_net_add_mport(net, port); + rio_mport_read_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_COMPONENT_TAG_CSR, ®val); + + if (regval) { + rdev = rio_get_comptag((regval & 0xffff), NULL); + + if (rdev && prev && rio_is_switch(prev)) { + pr_debug("RIO: redundant path to %s\n", + rio_name(rdev)); + prev->rswitch->nextdev[prev_port] = rdev; + } + } + return 0; } @@ -925,10 +922,11 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, */ static int rio_enum_complete(struct rio_mport *port) { - u32 tag_csr; + u32 regval; - rio_local_read_config_32(port, RIO_COMPONENT_TAG_CSR, &tag_csr); - return (tag_csr & 0xffff) ? 1 : 0; + rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR, + ®val); + return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0; } /** @@ -991,6 +989,8 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, break; } + if (ndestid == RIO_ANY_DESTID(port->sys_size)) + continue; rio_unlock_device(port, destid, hopcount); if (rio_disc_peer (net, port, ndestid, hopcount + 1) < 0) @@ -1163,6 +1163,10 @@ int __devinit rio_enum_mport(struct rio_mport *mport) /* Enable Input Output Port (transmitter reviever) */ rio_enable_rx_tx_port(mport, 1, 0, 0, 0); + /* Set component tag for host */ + rio_local_write_config_32(mport, RIO_COMPONENT_TAG_CSR, + next_comptag++); + if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) { /* A higher priority host won enumeration, bail. */ printk(KERN_INFO diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index fa5e3cbe4c83..7f18a65c4ed0 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -443,7 +443,7 @@ rio_mport_get_physefb(struct rio_mport *port, int local, * @from is not %NULL, searches continue from next device on the global * list. */ -static struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from) +struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from) { struct list_head *n; struct rio_dev *rdev; @@ -507,7 +507,7 @@ static int rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) { u32 result; - int p_port, rc = -EIO; + int p_port, dstid, rc = -EIO; struct rio_dev *prev = NULL; /* Find switch with failed RIO link */ @@ -522,20 +522,18 @@ rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) if (prev == NULL) goto err_out; - /* Find port with failed RIO link */ - for (p_port = 0; - p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo); p_port++) - if (prev->rswitch->nextdev[p_port] == rdev) - break; + dstid = (rdev->pef & RIO_PEF_SWITCH) ? + rdev->rswitch->destid : rdev->destid; + p_port = prev->rswitch->route_table[dstid]; - if (p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo)) { + if (p_port != RIO_INVALID_ROUTE) { pr_debug("RIO: link failed on [%s]-P%d\n", rio_name(prev), p_port); *nrdev = prev; *npnum = p_port; rc = 0; } else - pr_debug("RIO: failed to trace route to %s\n", rio_name(prev)); + pr_debug("RIO: failed to trace route to %s\n", rio_name(rdev)); err_out: return rc; } diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index d249a1205c7d..b1af414f15e6 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -38,6 +38,7 @@ extern int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, u16 table); extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock); +extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from); /* Structures internal to the RIO core code */ extern struct device_attribute rio_dev_attrs[]; diff --git a/include/linux/rio.h b/include/linux/rio.h index 4fa5e3d2b117..0bed941f9b13 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -177,6 +177,7 @@ enum rio_phy_type { * @index: Port index, unique among all port interfaces of the same type * @sys_size: RapidIO common transport system size * @phy_type: RapidIO phy type + * @phys_efptr: RIO port extended features pointer * @name: Port name string * @priv: Master port private data */ @@ -198,6 +199,7 @@ struct rio_mport { * 1 - Large size, 65536 devices. */ enum rio_phy_type phy_type; /* RapidIO phy type */ + u32 phys_efptr; unsigned char name[40]; void *priv; /* Master port private data */ }; From 388c45ccfaeec68e334ad79edeb0b5b0a43197ff Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:35 -0700 Subject: [PATCH 115/139] rapidio: fix IDLE2 bits corruption RapidIO spec v.2.1 adds Idle Sequence 2 into LP-Serial Physical Layer. The fix ensures that corresponding bits are not corrupted during error handling. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 9 ++------- include/linux/rio_regs.h | 3 +-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 7f18a65c4ed0..68cf0c99138a 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -871,15 +871,10 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); } - /* Clear remaining error bits */ + /* Clear remaining error bits and Port-Write Pending bit */ rio_mport_write_config_32(mport, destid, hopcount, rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status & RIO_PORT_N_ERR_STS_CLR_MASK); - - /* Clear Port-Write Pending bit */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - RIO_PORT_N_ERR_STS_PW_PEND); + err_status); return 0; } diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index a18b2e22aa1d..d63dcbaea169 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -229,7 +229,7 @@ #define RIO_MNT_REQ_CMD_IS 0x04 /* Input-status command */ #define RIO_PORT_N_MNT_RSP_CSR(x) (0x0044 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_MNT_RSP_RVAL 0x80000000 /* Response Valid */ -#define RIO_PORT_N_MNT_RSP_ASTAT 0x000003e0 /* ackID Status */ +#define RIO_PORT_N_MNT_RSP_ASTAT 0x000007e0 /* ackID Status */ #define RIO_PORT_N_MNT_RSP_LSTAT 0x0000001f /* Link Status */ #define RIO_PORT_N_ACK_STS_CSR(x) (0x0048 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_ACK_CLEAR 0x80000000 @@ -243,7 +243,6 @@ #define RIO_PORT_N_ERR_STS_PORT_ERR 0x00000004 #define RIO_PORT_N_ERR_STS_PORT_OK 0x00000002 #define RIO_PORT_N_ERR_STS_PORT_UNINIT 0x00000001 -#define RIO_PORT_N_ERR_STS_CLR_MASK 0x07120204 #define RIO_PORT_N_CTL_CSR(x) (0x005c + x*0x20) #define RIO_PORT_N_CTL_PWIDTH 0xc0000000 #define RIO_PORT_N_CTL_PWIDTH_1 0x00000000 From 2d4da0fc0940625d59f014c3f85370f9b60ba7f4 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:35 -0700 Subject: [PATCH 116/139] rapidio: fix maximum port number in tsi57x EM initialization Replace hardcoded maximum port number with actual reported number of switch ports. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/tsi57x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index d9e94920e8b0..2003fb63c404 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -166,7 +166,8 @@ tsi57x_em_init(struct rio_dev *rdev) pr_debug("TSI578 %s [%d:%d]\n", __func__, destid, hopcount); - for (portnum = 0; portnum < 16; portnum++) { + for (portnum = 0; + portnum < RIO_GET_TOTAL_PORTS(rdev->swpinfo); portnum++) { /* Make sure that Port-Writes are enabled (for all ports) */ rio_mport_read_config_32(mport, destid, hopcount, TSI578_SP_MODE(portnum), ®val); From 1548bf316f18090576f611c7318447c512ddbdd2 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:36 -0700 Subject: [PATCH 117/139] rapidio: fix destructive port EM initialization for Tsi568 Replace possibly damaging broadcast writes into the per-port SP_MODE registers with individual writes for each port. This will preserve individual port configurations in case if ports are configured differently. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/tsi568.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/rapidio/switches/tsi568.c b/drivers/rapidio/switches/tsi568.c index f7fd7898606e..b9a389b9f812 100644 --- a/drivers/rapidio/switches/tsi568.c +++ b/drivers/rapidio/switches/tsi568.c @@ -29,7 +29,7 @@ #define SPP_ROUTE_CFG_DESTID(n) (0x11070 + 0x100*n) #define SPP_ROUTE_CFG_PORT(n) (0x11074 + 0x100*n) -#define TSI568_SP_MODE_BC 0x10004 +#define TSI568_SP_MODE(n) (0x11004 + 0x100*n) #define TSI568_SP_MODE_PW_DIS 0x08000000 static int @@ -117,14 +117,19 @@ tsi568_em_init(struct rio_dev *rdev) u16 destid = rdev->rswitch->destid; u8 hopcount = rdev->rswitch->hopcount; u32 regval; + int portnum; pr_debug("TSI568 %s [%d:%d]\n", __func__, destid, hopcount); /* Make sure that Port-Writes are disabled (for all ports) */ - rio_mport_read_config_32(mport, destid, hopcount, - TSI568_SP_MODE_BC, ®val); - rio_mport_write_config_32(mport, destid, hopcount, - TSI568_SP_MODE_BC, regval | TSI568_SP_MODE_PW_DIS); + for (portnum = 0; + portnum < RIO_GET_TOTAL_PORTS(rdev->swpinfo); portnum++) { + rio_mport_read_config_32(mport, destid, hopcount, + TSI568_SP_MODE(portnum), ®val); + rio_mport_write_config_32(mport, destid, hopcount, + TSI568_SP_MODE(portnum), + regval | TSI568_SP_MODE_PW_DIS); + } return 0; } From 9bd38c2cdaba1f3ab2e6f90ac1b3d72481477092 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:37 -0700 Subject: [PATCH 118/139] namespaces: remove pid_ns and net_ns experimental status The pid namespace is in the kernel since 2.6.27 and the net_ns since 2.6.29. They are enabled in the distro by default and used by userspace component. They are mature enough to remove the 'experimental' label. Signed-off-by: Daniel Lezcano Cc: "Eric W. Biederman" Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 3ae8ffe738eb..c91ce6bcbe68 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -766,21 +766,18 @@ config USER_NS If unsure, say N. config PID_NS - bool "PID Namespaces (EXPERIMENTAL)" + bool "PID Namespaces" default n - depends on NAMESPACES && EXPERIMENTAL + depends on NAMESPACES help Support process id namespaces. This allows having multiple processes with the same pid as long as they are in different pid namespaces. This is a building block of containers. - Unless you want to work with an experimental feature - say N here. - config NET_NS bool "Network namespace" default n - depends on NAMESPACES && EXPERIMENTAL && NET + depends on NAMESPACES && NET help Allow user space to create what appear to be multiple instances of the network stack. From 17a6d4411a4dc7e436e1a71c24e10048452edd98 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:37 -0700 Subject: [PATCH 119/139] namespaces: default all the namespaces to 'yes' when CONFIG_NAMESPACES is selected As the different namespaces depend on 'CONFIG_NAMESPACES', it is logical to enable all the namespaces when we enable NAMESPACES. Signed-off-by: Daniel Lezcano Cc: "Eric W. Biederman" Cc: David Miller Acked-By: Matt Helsley Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index c91ce6bcbe68..239fae49750b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -746,6 +746,7 @@ config NAMESPACES config UTS_NS bool "UTS namespace" depends on NAMESPACES + default y help In this namespace tasks see different info provided with the uname() system call @@ -753,6 +754,7 @@ config UTS_NS config IPC_NS bool "IPC namespace" depends on NAMESPACES && (SYSVIPC || POSIX_MQUEUE) + default y help In this namespace tasks work with IPC ids which correspond to different IPC objects in different namespaces. @@ -760,6 +762,7 @@ config IPC_NS config USER_NS bool "User namespace (EXPERIMENTAL)" depends on NAMESPACES && EXPERIMENTAL + default y help This allows containers, i.e. vservers, to use user namespaces to provide different user info for different servers. @@ -767,8 +770,8 @@ config USER_NS config PID_NS bool "PID Namespaces" - default n depends on NAMESPACES + default y help Support process id namespaces. This allows having multiple processes with the same pid as long as they are in different @@ -776,8 +779,8 @@ config PID_NS config NET_NS bool "Network namespace" - default n depends on NAMESPACES && NET + default y help Allow user space to create what appear to be multiple instances of the network stack. From 8dd2a82c29111bb8ad2a2933aa269b49f72d356d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:38 -0700 Subject: [PATCH 120/139] namespaces Kconfig: make namespace a submenu Make the namespaces config option a submenu. Signed-off-by: Daniel Lezcano Cc: Li Zefan Cc: "Serge E. Hallyn" Cc: "Eric W. Biederman" Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 239fae49750b..5b3a3942a98c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -734,7 +734,7 @@ config RELAY If unsure, say N. -config NAMESPACES +menuconfig NAMESPACES bool "Namespaces support" if EMBEDDED default !EMBEDDED help @@ -743,9 +743,10 @@ config NAMESPACES or same user id or pid may refer to different tasks when used in different namespaces. +if NAMESPACES + config UTS_NS bool "UTS namespace" - depends on NAMESPACES default y help In this namespace tasks see different info provided with the @@ -753,7 +754,7 @@ config UTS_NS config IPC_NS bool "IPC namespace" - depends on NAMESPACES && (SYSVIPC || POSIX_MQUEUE) + depends on (SYSVIPC || POSIX_MQUEUE) default y help In this namespace tasks work with IPC ids which correspond to @@ -761,7 +762,7 @@ config IPC_NS config USER_NS bool "User namespace (EXPERIMENTAL)" - depends on NAMESPACES && EXPERIMENTAL + depends on EXPERIMENTAL default y help This allows containers, i.e. vservers, to use user namespaces @@ -770,7 +771,6 @@ config USER_NS config PID_NS bool "PID Namespaces" - depends on NAMESPACES default y help Support process id namespaces. This allows having multiple @@ -779,12 +779,14 @@ config PID_NS config NET_NS bool "Network namespace" - depends on NAMESPACES && NET + depends on NET default y help Allow user space to create what appear to be multiple instances of the network stack. +endif # NAMESPACES + config BLK_DEV_INITRD bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" depends on BROKEN || !FRV From 79ae9c29e4c80b781dd915d6f01be64e668be29c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:39 -0700 Subject: [PATCH 121/139] namespaces Kconfig: remove pointless cgroup dependency The different cgroup subsystems are under the cgroup submenu. The dependency between the cgroups and the menu subsystems is pointless. Signed-off-by: Daniel Lezcano Acked-by: Li Zefan Cc: "Serge E. Hallyn" Cc: "Eric W. Biederman" Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 5b3a3942a98c..fbc7c1f7af60 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -518,7 +518,6 @@ if CGROUPS config CGROUP_DEBUG bool "Example debug cgroup subsystem" - depends on CGROUPS default n help This option enables a simple cgroup subsystem that @@ -529,7 +528,6 @@ config CGROUP_DEBUG config CGROUP_NS bool "Namespace cgroup subsystem" - depends on CGROUPS help Provides a simple namespace cgroup subsystem to provide hierarchical naming of sets of namespaces, @@ -538,21 +536,19 @@ config CGROUP_NS config CGROUP_FREEZER bool "Freezer cgroup subsystem" - depends on CGROUPS help Provides a way to freeze and unfreeze all tasks in a cgroup. config CGROUP_DEVICE bool "Device controller for cgroups" - depends on CGROUPS && EXPERIMENTAL + depends on EXPERIMENTAL help Provides a cgroup implementing whitelists for devices which a process in the cgroup can mknod or open. config CPUSETS bool "Cpuset support" - depends on CGROUPS help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and @@ -568,7 +564,6 @@ config PROC_PID_CPUSET config CGROUP_CPUACCT bool "Simple CPU accounting cgroup subsystem" - depends on CGROUPS help Provides a simple Resource Controller for monitoring the total CPU consumed by the tasks in a cgroup. @@ -578,11 +573,10 @@ config RESOURCE_COUNTERS help This option enables controller independent resource accounting infrastructure that works with cgroups. - depends on CGROUPS config CGROUP_MEM_RES_CTLR bool "Memory Resource Controller for Control Groups" - depends on CGROUPS && RESOURCE_COUNTERS + depends on RESOURCE_COUNTERS select MM_OWNER help Provides a memory resource controller that manages both anonymous @@ -623,7 +617,7 @@ config CGROUP_MEM_RES_CTLR_SWAP menuconfig CGROUP_SCHED bool "Group CPU scheduler" - depends on EXPERIMENTAL && CGROUPS + depends on EXPERIMENTAL default n help This feature lets CPU scheduler recognize task groups and control CPU @@ -652,7 +646,7 @@ endif #CGROUP_SCHED config BLK_CGROUP tristate "Block IO controller" - depends on CGROUPS && BLOCK + depends on BLOCK default n ---help--- Generic block IO controller cgroup interface. This is the common From eef691b36e9a6ee6e646c83af3f10bb60e926fa5 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:40 -0700 Subject: [PATCH 122/139] namespaces Kconfig: remove the cgroup device whitelist experimental tag This subsystem is merged since a long time now, I think we can consider it mature enough. Signed-off-by: Daniel Lezcano Cc: Li Zefan Cc: "Serge E. Hallyn" Cc: "Eric W. Biederman" Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index fbc7c1f7af60..813e07604f89 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -542,7 +542,6 @@ config CGROUP_FREEZER config CGROUP_DEVICE bool "Device controller for cgroups" - depends on EXPERIMENTAL help Provides a cgroup implementing whitelists for devices which a process in the cgroup can mknod or open. From 7af37bec41b19fcb9de2b42f355a39149641170e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:34:41 -0700 Subject: [PATCH 123/139] namespaces Kconfig: move namespace menu location after the cgroup We have the namespaces as a menuconfig like the cgroup. The cgroup and the namespace are two base bricks for the containers. It is more logical to put the namespace menu right after the cgroup menu. Signed-off-by: Daniel Lezcano Cc: Li Zefan Cc: "Serge E. Hallyn" Cc: "Eric W. Biederman" Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 104 +++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 813e07604f89..88c10468db46 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -675,58 +675,6 @@ config DEBUG_BLK_CGROUP endif # CGROUPS -config MM_OWNER - bool - -config SYSFS_DEPRECATED - bool "enable deprecated sysfs features to support old userspace tools" - depends on SYSFS - default n - help - This option adds code that switches the layout of the "block" class - devices, to not show up in /sys/class/block/, but only in - /sys/block/. - - This switch is only active when the sysfs.deprecated=1 boot option is - passed or the SYSFS_DEPRECATED_V2 option is set. - - This option allows new kernels to run on old distributions and tools, - which might get confused by /sys/class/block/. Since 2007/2008 all - major distributions and tools handle this just fine. - - Recent distributions and userspace tools after 2009/2010 depend on - the existence of /sys/class/block/, and will not work with this - option enabled. - - Only if you are using a new kernel on an old distribution, you might - need to say Y here. - -config SYSFS_DEPRECATED_V2 - bool "enabled deprecated sysfs features by default" - default n - depends on SYSFS - depends on SYSFS_DEPRECATED - help - Enable deprecated sysfs by default. - - See the CONFIG_SYSFS_DEPRECATED option for more details about this - option. - - Only if you are using a new kernel on an old distribution, you might - need to say Y here. Even then, odds are you would not need it - enabled, you can always pass the boot option if absolutely necessary. - -config RELAY - bool "Kernel->user space relay support (formerly relayfs)" - help - This option enables support for relay interface support in - certain file systems (such as debugfs). - It is designed to provide an efficient mechanism for tools and - facilities to relay large amounts of data from kernel space to - user space. - - If unsure, say N. - menuconfig NAMESPACES bool "Namespaces support" if EMBEDDED default !EMBEDDED @@ -780,6 +728,58 @@ config NET_NS endif # NAMESPACES +config MM_OWNER + bool + +config SYSFS_DEPRECATED + bool "enable deprecated sysfs features to support old userspace tools" + depends on SYSFS + default n + help + This option adds code that switches the layout of the "block" class + devices, to not show up in /sys/class/block/, but only in + /sys/block/. + + This switch is only active when the sysfs.deprecated=1 boot option is + passed or the SYSFS_DEPRECATED_V2 option is set. + + This option allows new kernels to run on old distributions and tools, + which might get confused by /sys/class/block/. Since 2007/2008 all + major distributions and tools handle this just fine. + + Recent distributions and userspace tools after 2009/2010 depend on + the existence of /sys/class/block/, and will not work with this + option enabled. + + Only if you are using a new kernel on an old distribution, you might + need to say Y here. + +config SYSFS_DEPRECATED_V2 + bool "enabled deprecated sysfs features by default" + default n + depends on SYSFS + depends on SYSFS_DEPRECATED + help + Enable deprecated sysfs by default. + + See the CONFIG_SYSFS_DEPRECATED option for more details about this + option. + + Only if you are using a new kernel on an old distribution, you might + need to say Y here. Even then, odds are you would not need it + enabled, you can always pass the boot option if absolutely necessary. + +config RELAY + bool "Kernel->user space relay support (formerly relayfs)" + help + This option enables support for relay interface support in + certain file systems (such as debugfs). + It is designed to provide an efficient mechanism for tools and + facilities to relay large amounts of data from kernel space to + user space. + + If unsure, say N. + config BLK_DEV_INITRD bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" depends on BROKEN || !FRV From db9e5679d6aecb17253f41bd06d98194800f9c01 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 27 Oct 2010 15:34:42 -0700 Subject: [PATCH 124/139] delay-accounting: reimplement -c for getdelays.c to report information on a target command Task delay-accounting was identified as one means of determining how long a process spends in congestion_wait() without adding new statistics. For example, if the workload should not be doing IO, delay-accounting could reveal how long it was spending in unexpected IO or delays. Unfortunately, on closer examination it was clear that getdelays does not act as documented. Commit a3baf649 ("per-task-delay-accounting: documentation") added Documentation/accounting/getdelays.c with a -c switch that was documented to fork/exec a child and report statistics on it but for reasons that are unclear to me, commit 9e06d3f9 deleted support for this switch but did not update the documentation. It might be an oversight or it might be because the control flow of the program meant that accounting information would be printed once early in the lifetime of the program making it of limited use. This patch reimplements -c for getdelays.c to act as documented. Unlike the original version, it waits until the command completes before printing any information on it. An example of it being used looks like $ ./getdelays -d -c find /home/mel -name mel print delayacct stats ON /home/mel /home/mel/.notes-wine/drive_c/windows/profiles/mel /home/mel/.wine/drive_c/windows/profiles/mel /home/mel/git-configs/dot.kde/share/apps/konqueror/home/mel PID 5923 CPU count real total virtual total delay total 42779 5051232096 5164722692 564207988 IO count delay total 41727 97804147758 SWAP count delay total 0 0 RECLAIM count delay total 0 0 [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/getdelays.c | 38 ++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 6e25c2659e0a..a2976a6de033 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -266,11 +267,13 @@ int main(int argc, char *argv[]) int containerset = 0; char containerpath[1024]; int cfd = 0; + int forking = 0; + sigset_t sigset; struct msgtemplate msg; - while (1) { - c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:"); + while (!forking) { + c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:"); if (c < 0) break; @@ -319,6 +322,28 @@ int main(int argc, char *argv[]) err(1, "Invalid pid\n"); cmd_type = TASKSTATS_CMD_ATTR_PID; break; + case 'c': + + /* Block SIGCHLD for sigwait() later */ + if (sigemptyset(&sigset) == -1) + err(1, "Failed to empty sigset"); + if (sigaddset(&sigset, SIGCHLD)) + err(1, "Failed to set sigchld in sigset"); + sigprocmask(SIG_BLOCK, &sigset, NULL); + + /* fork/exec a child */ + tid = fork(); + if (tid < 0) + err(1, "Fork failed\n"); + if (tid == 0) + if (execvp(argv[optind - 1], + &argv[optind - 1]) < 0) + exit(-1); + + /* Set the command type and avoid further processing */ + cmd_type = TASKSTATS_CMD_ATTR_PID; + forking = 1; + break; case 'v': printf("debug on\n"); dbg = 1; @@ -370,6 +395,15 @@ int main(int argc, char *argv[]) goto err; } + /* + * If we forked a child, wait for it to exit. Cannot use waitpid() + * as all the delicious data would be reaped as part of the wait + */ + if (tid && forking) { + int sig_received; + sigwait(&sigset, &sig_received); + } + if (tid) { rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, cmd_type, &tid, sizeof(__u32)); From 85893120699f8bae8caa12a8ee18ab5fceac978e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 27 Oct 2010 15:34:43 -0700 Subject: [PATCH 125/139] delayacct: align to 8 byte boundary on 64-bit systems prepare_reply() sets up an skb for the response. The payload contains: +--------------------------------+ | genlmsghdr - 4 bytes | +--------------------------------+ | NLA header - 4 bytes | /* Aggregate header */ +-+------------------------------+ | | NLA header - 4 bytes | /* PID header */ | +------------------------------+ | | pid/tgid - 4 bytes | | +------------------------------+ | | NLA header - 4 bytes | /* stats header */ | + -----------------------------+ <- oops. aligned on 4 byte boundary | | struct taskstats - 328 bytes | +-+------------------------------+ The start of the taskstats struct must be 8 byte aligned on IA64 (and other systems with 8 byte alignment rules for 64-bit types) or runtime alignment warnings will be issued. This patch pads the pid/tgid field out to sizeof(long), which forces the alignment of taskstats. The getdelays userspace code is ok with this since it assumes 32-bit pid/tgid and then honors that header's length field. An array is used to avoid exposing kernel memory contents to userspace in the response. Signed-off-by: Jeff Mahoney Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..5a651aa63d61 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -360,6 +360,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) struct nlattr *na, *ret; int aggr; + /* If we don't pad, we end up with alignment on a 4 byte boundary. + * This causes lots of runtime warnings on systems requiring 8 byte + * alignment */ + u32 pids[2] = { pid, 0 }; + int pid_size = ALIGN(sizeof(pid), sizeof(long)); + aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; @@ -367,7 +373,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, sizeof(pid), &pid) < 0) + if (nla_put(skb, type, pid_size, pids) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) From 9323312592cca636d7c2580dc85fa4846efa86a2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: [PATCH 126/139] taskstats: separate taskstats commands Move each taskstats command into a single function. This makes the code more readable and makes it easier to add new commands. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 118 ++++++++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 5a651aa63d61..9970cae04f15 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -430,39 +430,46 @@ err: return rc; } -static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +static int cmd_attr_register_cpumask(struct genl_info *info) { - int rc; - struct sk_buff *rep_skb; - struct taskstats *stats; - size_t size; cpumask_var_t mask; + int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, REGISTER); - goto free_return_rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, REGISTER); +out: + free_cpumask_var(mask); + return rc; +} +static int cmd_attr_deregister_cpumask(struct genl_info *info) +{ + cpumask_var_t mask; + int rc; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); -free_return_rc: - free_cpumask_var(mask); - return rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, DEREGISTER); +out: free_cpumask_var(mask); + return rc; +} + +static int cmd_attr_pid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 pid; + int rc; - /* - * Size includes space for nested attributes - */ size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); @@ -471,33 +478,64 @@ free_return_rc: return rc; rc = -EINVAL; - if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { - u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); - if (!stats) - goto err; - - rc = fill_pid(pid, NULL, stats); - if (rc < 0) - goto err; - } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { - u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); - if (!stats) - goto err; - - rc = fill_tgid(tgid, NULL, stats); - if (rc < 0) - goto err; - } else + pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); + if (!stats) goto err; + rc = fill_pid(pid, NULL, stats); + if (rc < 0) + goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } +static int cmd_attr_tgid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 tgid; + int rc; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + + rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); + if (rc < 0) + return rc; + + rc = -EINVAL; + tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); + if (!stats) + goto err; + + rc = fill_tgid(tgid, NULL, stats); + if (rc < 0) + goto err; + return send_reply(rep_skb, info); +err: + nlmsg_free(rep_skb); + return rc; +} + +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) + return cmd_attr_register_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) + return cmd_attr_deregister_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) + return cmd_attr_pid(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) + return cmd_attr_tgid(info); + else + return -EINVAL; +} + static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; From 3d9e0cf1fe007b88db55d43dfdb6839e1a029ca5 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: [PATCH 127/139] taskstats: split fill_pid function Separate the finding of a task_struct by pid or tgid from filling the taskstats data. This makes the code more readable. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 52 ++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9970cae04f15..c8231fb15708 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } -static int fill_pid(pid_t pid, struct task_struct *tsk, - struct taskstats *stats) +static void fill_stats(struct task_struct *tsk, struct taskstats *stats) { - int rc = 0; - - if (!tsk) { - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return -ESRCH; - } else - get_task_struct(tsk); - memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to @@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); - - /* Define err: label here if needed */ - put_task_struct(tsk); - return rc; - } -static int fill_tgid(pid_t tgid, struct task_struct *first, - struct taskstats *stats) +static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; + + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return -ESRCH; + fill_stats(tsk, stats); + put_task_struct(tsk); + return 0; +} + +static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) +{ + struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; @@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, * leaders who are already counted with the dead tasks */ rcu_read_lock(); - if (!first) - first = find_task_by_vpid(tgid); + first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; @@ -268,7 +263,6 @@ out: return rc; } - static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; @@ -483,7 +477,7 @@ static int cmd_attr_pid(struct genl_info *info) if (!stats) goto err; - rc = fill_pid(pid, NULL, stats); + rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -513,7 +507,7 @@ static int cmd_attr_tgid(struct genl_info *info) if (!stats) goto err; - rc = fill_tgid(tgid, NULL, stats); + rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -599,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!stats) goto err; - rc = fill_pid(-1, tsk, stats); - if (rc < 0) - goto err; + fill_stats(tsk, stats); /* * Doesn't matter if tsk is the leader or the last group member leaving From d57af9b2142f31a39dcfdeb30776baadfc802827 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:45 -0700 Subject: [PATCH 128/139] taskstats: use real microsecond granularity for CPU times The taskstats interface uses microsecond granularity for the user and system time values. The conversion from cputime to the taskstats values uses the cputime_to_msecs primitive which effectively limits the granularity to milliseconds. Add the cputime_to_usecs primitive for architectures that have better, more precise CPU time values. Remove cputime_to_msecs primitive because there are no more users left. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Cc: Luck Tony Cc: Shailabh Nagar Cc: Martin Schwidefsky Cc: Oleg Nesterov Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/cputime.h | 6 +++--- arch/powerpc/include/asm/cputime.h | 12 ++++++------ arch/s390/include/asm/cputime.h | 10 +++++----- include/asm-generic/cputime.h | 6 +++--- kernel/tsacct.c | 10 ++++------ 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fa8a8594660..6073b187528a 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -56,10 +56,10 @@ typedef u64 cputime64_t; #define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ)) /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ -#define cputime_to_msecs(__ct) ((__ct) / NSEC_PER_MSEC) -#define msecs_to_cputime(__msecs) ((__msecs) * NSEC_PER_MSEC) +#define cputime_to_usecs(__ct) ((__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) ((__usecs) * NSEC_PER_USEC) /* * Convert cputime <-> seconds diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 8bdc6a9e5773..1cf20bdfbeca 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -124,23 +124,23 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct) } /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ extern u64 __cputime_msec_factor; -static inline unsigned long cputime_to_msecs(const cputime_t ct) +static inline unsigned long cputime_to_usecs(const cputime_t ct) { - return mulhdu(ct, __cputime_msec_factor); + return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC; } -static inline cputime_t msecs_to_cputime(const unsigned long ms) +static inline cputime_t usecs_to_cputime(const unsigned long us) { cputime_t ct; unsigned long sec; /* have to be a little careful about overflow */ - ct = ms % 1000; - sec = ms / 1000; + ct = us % 1000000; + sec = us / 1000000; if (ct) { ct *= tb_ticks_per_sec; do_div(ct, 1000); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8b1a52a137c5..40e2ab0fa3f0 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -73,18 +73,18 @@ cputime64_to_jiffies64(cputime64_t cputime) } /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ static inline unsigned int -cputime_to_msecs(const cputime_t cputime) +cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096000); + return cputime_div(cputime, 4096); } static inline cputime_t -msecs_to_cputime(const unsigned int m) +usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096000; + return (cputime_t) m * 4096; } /* diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index ca0f239f0e13..2bcc5c7c22a6 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -33,10 +33,10 @@ typedef u64 cputime64_t; /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ -#define cputime_to_msecs(__ct) jiffies_to_msecs(__ct) -#define msecs_to_cputime(__msecs) msecs_to_jiffies(__msecs) +#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct); +#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs); /* * Convert cputime to seconds and back. diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); - stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; - stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; - stats->ac_utimescaled = - cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; - stats->ac_stimescaled = - cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; + stats->ac_utime = cputime_to_usecs(tsk->utime); + stats->ac_stime = cputime_to_usecs(tsk->stime); + stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); + stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; From 0be8557bcd34887d5a42c01c5659cab5ecf99f13 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 27 Oct 2010 15:34:46 -0700 Subject: [PATCH 129/139] fuse: use release_pages() Replace iterated page_cache_release() with release_pages(), which is faster and shorter. Needs release_pages() to be exported to modules. Suggested-by: Andrew Morton Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 7 +------ mm/swap.c | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b98664275f02..6e07696308dc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1334,12 +1334,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - int i; - - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; - page_cache_release(page); - } + release_pages(req->pages, req->num_pages, 0); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/mm/swap.c b/mm/swap.c index 3ce7bc373a52..3f4854205b16 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -378,6 +378,7 @@ void release_pages(struct page **pages, int nr, int cold) pagevec_free(&pages_to_free); } +EXPORT_SYMBOL(release_pages); /* * The pages which we're about to release may be in the deferred lru-addition From 4ad9b208cf1d03c8c3c1e0063ee6eea88833118f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 27 Oct 2010 15:34:47 -0700 Subject: [PATCH 130/139] sparc: remove dma64_addr_t usage dma64_addr_t looks pointless (at least there is no point that an architecture has the own dma64_addr_t typedef). dma_addr_t is set to 32 or 64 bits appropriately. You can use u64 at places where you know that 64 bit address is always necessary. Let's use u64 instead for sparc32. Looks like PCI654_REQUIRED_MASK or PCI64_ADR_BASE isn't used. They can be removed? Signed-off-by: FUJITA Tomonori Acked-by: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pci_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index 5312782f0b5e..948b686ec089 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -38,7 +38,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) * types on sparc64. However, it requires that the device * can drive enough of the 64 bits. */ -#define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) +#define PCI64_REQUIRED_MASK (~(u64)0) #define PCI64_ADDR_BASE 0xfffc000000000000UL #ifdef CONFIG_PCI From c1c7438dbbd03442ff86066843485db7fadd67bd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 27 Oct 2010 15:34:47 -0700 Subject: [PATCH 131/139] mips: remove dma64_addr_t usage dma64_addr_t looks pointless (at least there is no point that an architecture has the own dma64_addr_t typedef). dma_addr_t is set to 32 or 64 bits appropriately. You can use u64 at places where you know that 64 bit address is always necessary. Let's use u64 instead for mips. Signed-off-by: FUJITA Tomonori Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pci/bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h index 5f4b9d4e4114..f1f508e4f971 100644 --- a/arch/mips/include/asm/pci/bridge.h +++ b/arch/mips/include/asm/pci/bridge.h @@ -839,7 +839,7 @@ struct bridge_controller { nasid_t nasid; unsigned int widget_id; unsigned int irq_cpu; - dma64_addr_t baddr; + u64 baddr; unsigned int pci_int[8]; }; From f49d2eb314f3f83270bef6aea2abb7a26d4218ee Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 27 Oct 2010 15:34:48 -0700 Subject: [PATCH 132/139] alpha: remove dma64_addr_t usage dma_addr_t is always 64 bit on alpha. So let's use dma_addr_t instead. Signed-off-by: FUJITA Tomonori Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/pci_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index d1dbd9acd1df..022c2748fa41 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -223,7 +223,7 @@ iommu_arena_free(struct pci_iommu_arena *arena, long ofs, long n) */ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) { - dma64_addr_t dac_offset = alpha_mv.pci_dac_offset; + dma_addr_t dac_offset = alpha_mv.pci_dac_offset; int ok = 1; /* If this is not set, the machine doesn't support DAC at all. */ @@ -756,7 +756,7 @@ static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg, spin_lock_irqsave(&arena->lock, flags); for (end = sg + nents; sg < end; ++sg) { - dma64_addr_t addr; + dma_addr_t addr; size_t size; long npages, ofs; dma_addr_t tend; From 12aa4c64174cb0d915cd1c7b763847c0ffa8e92c Mon Sep 17 00:00:00 2001 From: Brian Swetland Date: Wed, 27 Oct 2010 15:34:49 -0700 Subject: [PATCH 133/139] w1: don't allow arbitrary users to remove w1 devices The search/pullup/add/remove device attributes were 0666 which would allow arbitrary users to affect the 1 wire bus. Change to 0664 to prevent that. I found this patch in the Android tree, apparently this has never been sent upstream so doing it now. Signed-off-by: Brian Swetland Signed-off-by: Linus Walleij Cc: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/w1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 2839e281cd65..b7b5014ff714 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -517,10 +517,10 @@ static W1_MASTER_ATTR_RO(max_slave_count, S_IRUGO); static W1_MASTER_ATTR_RO(attempts, S_IRUGO); static W1_MASTER_ATTR_RO(timeout, S_IRUGO); static W1_MASTER_ATTR_RO(pointer, S_IRUGO); -static W1_MASTER_ATTR_RW(search, S_IRUGO | S_IWUGO); -static W1_MASTER_ATTR_RW(pullup, S_IRUGO | S_IWUGO); -static W1_MASTER_ATTR_RW(add, S_IRUGO | S_IWUGO); -static W1_MASTER_ATTR_RW(remove, S_IRUGO | S_IWUGO); +static W1_MASTER_ATTR_RW(search, S_IRUGO | S_IWUSR | S_IWGRP); +static W1_MASTER_ATTR_RW(pullup, S_IRUGO | S_IWUSR | S_IWGRP); +static W1_MASTER_ATTR_RW(add, S_IRUGO | S_IWUSR | S_IWGRP); +static W1_MASTER_ATTR_RW(remove, S_IRUGO | S_IWUSR | S_IWGRP); static struct attribute *w1_master_default_attrs[] = { &w1_master_attribute_name.attr, From 144ecf310eb52d9df607b9b7eeb096743e232a96 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Wed, 27 Oct 2010 15:34:50 -0700 Subject: [PATCH 134/139] kfifo: fix kfifo_alloc() to return a signed int value Add a new __kfifo_int_must_check_helper() helper function, which is needed for kfifo_alloc() to return the right signed integer value. The origin __kfifo_must_check_helper() helper was renamed into __kfifo_uint_must_check_helper() to show the sign which is expected and returned. (And revert the temporary disabling of __kfifo_must_check_helper()) Signed-off-by: Stefani Seibold Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c238ad2f82ea..10308c6a3d1c 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -171,8 +171,17 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void); } -/* __kfifo_must_check_helper() is temporarily disabled because it was faulty */ -#define __kfifo_must_check_helper(x) (x) +static inline unsigned int __must_check +__kfifo_uint_must_check_helper(unsigned int val) +{ + return val; +} + +static inline int __must_check +__kfifo_int_must_check_helper(int val) +{ + return val; +} /** * kfifo_initialized - Check if the fifo is initialized @@ -264,7 +273,7 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void); * @fifo: address of the fifo to be used */ #define kfifo_avail(fifo) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ @@ -297,7 +306,7 @@ __kfifo_must_check_helper( \ * This function returns the size of the next fifo record in number of bytes. */ #define kfifo_peek_len(fifo) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ @@ -320,7 +329,7 @@ __kfifo_must_check_helper( \ * Return 0 if no error, otherwise an error code. */ #define kfifo_alloc(fifo, size, gfp_mask) \ -__kfifo_must_check_helper( \ +__kfifo_int_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -416,7 +425,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_get(fifo, val) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((val) + 1) __val = (val); \ @@ -457,7 +466,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_peek(fifo, val) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((val) + 1) __val = (val); \ @@ -549,7 +558,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_out(fifo, buf, n) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((buf) + 1) __buf = (buf); \ @@ -577,7 +586,7 @@ __kfifo_must_check_helper( \ * copied. */ #define kfifo_out_spinlocked(fifo, buf, n, lock) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ unsigned long __flags; \ unsigned int __ret; \ @@ -606,7 +615,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_from_user(fifo, from, len, copied) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ @@ -634,7 +643,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_to_user(fifo, to, len, copied) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ @@ -761,7 +770,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_out_peek(fifo, buf, n) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((buf) + 1) __buf = (buf); \ From 5de1cb2d0f1c1e5475d2bedf65b76828f8cdde22 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 27 Oct 2010 15:34:52 -0700 Subject: [PATCH 135/139] kernel/resource.c: handle reinsertion of an already-inserted resource If the same resource is inserted to the resource tree (maybe not on purpose), a dead loop will be created. In this situation, The kernel does not report any warning or error :( The command below will show a endless print. #cat /proc/iomem [akpm@linux-foundation.org: add WARN_ON()] Signed-off-by: Huang Shijie Cc: Jesse Barnes Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/resource.c b/kernel/resource.c index 7b36976e5dea..9c9841cb6902 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -453,6 +453,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou if (first == parent) return first; + if (WARN_ON(first == new)) /* duplicated insertion */ + return first; if ((first->start > new->start) || (first->end < new->end)) break; From c3b92ce9e75f6353104fc7f8e32fb9fdb2550ad0 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Wed, 27 Oct 2010 15:34:52 -0700 Subject: [PATCH 136/139] ramoops: use the platform data structure instead of module params As each board and system has different memory for ramoops. It's better to define the platform data instead of module params. [akpm@linux-foundation.org: fix ramoops_remove() return type] Signed-off-by: Kyungmin Park Cc: Marco Stornelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ramoops.c | 30 ++++++++++++++++++++++++++++-- include/linux/ramoops.h | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 include/linux/ramoops.h diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index 74f00b5ffa36..73dcb0ee41fd 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #define RAMOOPS_KERNMSG_HDR "====" #define RAMOOPS_HEADER_SIZE (5 + sizeof(struct timeval)) @@ -91,11 +93,17 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper, cxt->count = (cxt->count + 1) % cxt->max_count; } -static int __init ramoops_init(void) +static int __init ramoops_probe(struct platform_device *pdev) { + struct ramoops_platform_data *pdata = pdev->dev.platform_data; struct ramoops_context *cxt = &oops_cxt; int err = -EINVAL; + if (pdata) { + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + } + if (!mem_size) { printk(KERN_ERR "ramoops: invalid size specification"); goto fail3; @@ -142,7 +150,7 @@ fail3: return err; } -static void __exit ramoops_exit(void) +static int __exit ramoops_remove(struct platform_device *pdev) { struct ramoops_context *cxt = &oops_cxt; @@ -151,8 +159,26 @@ static void __exit ramoops_exit(void) iounmap(cxt->virt_addr); release_mem_region(cxt->phys_addr, cxt->size); + return 0; } +static struct platform_driver ramoops_driver = { + .remove = __exit_p(ramoops_remove), + .driver = { + .name = "ramoops", + .owner = THIS_MODULE, + }, +}; + +static int __init ramoops_init(void) +{ + return platform_driver_probe(&ramoops_driver, ramoops_probe); +} + +static void __exit ramoops_exit(void) +{ + platform_driver_unregister(&ramoops_driver); +} module_init(ramoops_init); module_exit(ramoops_exit); diff --git a/include/linux/ramoops.h b/include/linux/ramoops.h new file mode 100644 index 000000000000..0ae68a2c1212 --- /dev/null +++ b/include/linux/ramoops.h @@ -0,0 +1,15 @@ +#ifndef __RAMOOPS_H +#define __RAMOOPS_H + +/* + * Ramoops platform data + * @mem_size memory size for ramoops + * @mem_address physical memory address to contain ramoops + */ + +struct ramoops_platform_data { + unsigned long mem_size; + unsigned long mem_address; +}; + +#endif From 61d8e11e519ee7912ab59610fba1aaf08e3c1d84 Mon Sep 17 00:00:00 2001 From: Zimny Lech Date: Wed, 27 Oct 2010 15:34:53 -0700 Subject: [PATCH 137/139] Remove duplicate includes from many files Signed-off-by: Zimny Lech Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-tegra/timer.c | 1 - arch/arm/plat-nomadik/include/plat/ste_dma40.h | 1 - arch/tile/kernel/setup.c | 2 -- arch/x86/mm/init_64.c | 1 - arch/x86/xen/enlighten.c | 1 - drivers/media/IR/lirc_dev.c | 1 - drivers/platform/x86/intel_pmic_gpio.c | 1 - include/linux/virtio_9p.h | 1 - kernel/trace/trace_kprobe.c | 1 - 9 files changed, 10 deletions(-) diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index 2f420210d406..9057d6fd1d31 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h index 5fbde4b8dc12..93a812672d9a 100644 --- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h +++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h @@ -14,7 +14,6 @@ #include #include #include -#include /* dev types for memcpy */ #define STEDMA40_DEV_DST_MEMORY (-1) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f3a50e74f9a4..ae51cad12da0 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include #include #include -#include static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 44ab12dc2a12..0cd12db0b142 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c index 0acf6396e068..202581808bdc 100644 --- a/drivers/media/IR/lirc_dev.c +++ b/drivers/media/IR/lirc_dev.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index f540ff96c53f..e61db9dfebef 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 1faa80d92f05..e68b439b2860 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -5,7 +5,6 @@ #include #include #include -#include /* The feature bitmap for virtio 9P */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8d2852baa4a..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "trace.h" From 231f3d393f63f6e3b505afa179999bba491d0f08 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Oct 2010 15:34:53 -0700 Subject: [PATCH 138/139] select: rename estimate_accuracy() to select_estimate_accuracy() Make it a subsystem-specific identifier because we wish to amke it non-static in the next patch ("epoll: make epoll_wait() use the hrtimer range feature"). Cc: Shawn Bohrer Cc: Al Viro Cc: Davide Libenzi Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/select.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/select.c b/fs/select.c index 500a669f7790..5f023f911202 100644 --- a/fs/select.c +++ b/fs/select.c @@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) return slack; } -static long estimate_accuracy(struct timespec *tv) +static long select_estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; @@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); retval = 0; for (;;) { @@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); for (;;) { struct poll_list *walk; From 95aac7b1cd224f568fb83937044cd303ff11b029 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Wed, 27 Oct 2010 15:34:54 -0700 Subject: [PATCH 139/139] epoll: make epoll_wait() use the hrtimer range feature This make epoll use hrtimers for the timeout value which prevents epoll_wait() from timing out up to a millisecond early. This mirrors the behavior of select() and poll(). Signed-off-by: Shawn Bohrer Cc: Al Viro Acked-by: Davide Libenzi Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 33 ++++++++++++++++++--------------- fs/select.c | 2 +- include/linux/poll.h | 2 ++ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 256bb7bb102a..8cf07242067d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -77,9 +77,6 @@ /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 -/* Maximum msec timeout value storeable in a long int */ -#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) - #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_UNACTIVE_PTR ((void *) -1L) @@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep, static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail; + int res, eavail, timed_out = 0; unsigned long flags; - long jtimeout; + long slack; wait_queue_t wait; + struct timespec end_time; + ktime_t expires, *to = NULL; - /* - * Calculate the timeout by checking for the "infinite" value (-1) - * and the overflow condition. The passed timeout is in milliseconds, - * that why (t * HZ) / 1000. - */ - jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? - MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; + if (timeout > 0) { + ktime_get_ts(&end_time); + timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); + slack = select_estimate_accuracy(&end_time); + to = &expires; + *to = timespec_to_ktime(end_time); + } else if (timeout == 0) { + timed_out = 1; + } retry: spin_lock_irqsave(&ep->lock, flags); @@ -1150,7 +1151,7 @@ retry: * to TASK_INTERRUPTIBLE before doing the checks. */ set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&ep->rdllist) || !jtimeout) + if (!list_empty(&ep->rdllist) || timed_out) break; if (signal_pending(current)) { res = -EINTR; @@ -1158,7 +1159,9 @@ retry: } spin_unlock_irqrestore(&ep->lock, flags); - jtimeout = schedule_timeout(jtimeout); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + timed_out = 1; + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1176,7 +1179,7 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_send_events(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto retry; return res; diff --git a/fs/select.c b/fs/select.c index 5f023f911202..b7b10aa30861 100644 --- a/fs/select.c +++ b/fs/select.c @@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) return slack; } -static long select_estimate_accuracy(struct timespec *tv) +long select_estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; diff --git a/include/linux/poll.h b/include/linux/poll.h index 600cc1fde64d..56e76af78102 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); +extern long select_estimate_accuracy(struct timespec *tv); + static inline int poll_schedule(struct poll_wqueues *pwq, int state) {