From cf450136bfde77c7f95065c91bffded4aa7fa731 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 31 Jul 2011 13:23:49 -0400 Subject: [PATCH 01/47] ACPI: ignore FADT reset-reg-sup flag we check that the address is non-zero later anyway. https://bugzilla.kernel.org/show_bug.cgi?id=11533 Signed-off-by: Len Brown --- drivers/acpi/acpica/hwxface.c | 3 +-- drivers/acpi/reboot.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 9d38eb6c0d0b..fe1fb6366aa8 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -74,8 +74,7 @@ acpi_status acpi_reset(void) /* Check if the reset register is supported */ - if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) || - !reset_reg->address) { + if (!reset_reg->address) { return_ACPI_STATUS(AE_NOT_EXIST); } diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c index a6c77e8b37bd..c1d612435939 100644 --- a/drivers/acpi/reboot.c +++ b/drivers/acpi/reboot.c @@ -23,8 +23,7 @@ void acpi_reboot(void) /* Is the reset register supported? The spec says we should be * checking the bit width and bit offset, but Windows ignores * these fields */ - if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER)) - return; + /* Ignore also acpi_gbl_FADT.flags.ACPI_FADT_RESET_REGISTER */ reset_value = acpi_gbl_FADT.reset_value; From 3e80acd1af40fcd91a200b0416a7616b20c5d647 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 23 Feb 2012 22:40:43 +0200 Subject: [PATCH 02/47] ACPICA: Fix regression in FADT revision checks commit 64b3db22c04586997ab4be46dd5a5b99f8a2d390 (2.6.39), "Remove use of unreliable FADT revision field" causes regression for old P4 systems because now cst_control and other fields are not reset to 0. The effect is that acpi_processor_power_init will notice cst_control != 0 and a write to CST_CNT register is performed that should not happen. As result, the system oopses after the "No _CST, giving up" message, sometimes in acpi_ns_internalize_name, sometimes in acpi_ns_get_type, usually at random places. May be during migration to CPU 1 in acpi_processor_get_throttling. Every one of these settings help to avoid this problem: - acpi=off - processor.nocst=1 - maxcpus=1 The fix is to update acpi_gbl_FADT.header.length after the original value is used to check for old revisions. https://bugzilla.kernel.org/show_bug.cgi?id=42700 https://bugzilla.redhat.com/show_bug.cgi?id=727865 Signed-off-by: Julian Anastasov Acked-by: Bob Moore Signed-off-by: Len Brown --- drivers/acpi/acpica/tbfadt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index c5d870406f41..4c9c760db4a4 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -363,10 +363,6 @@ static void acpi_tb_convert_fadt(void) u32 address32; u32 i; - /* Update the local FADT table header length */ - - acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); - /* * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. * Later code will always use the X 64-bit field. Also, check for an @@ -408,6 +404,10 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.boot_flags = 0; } + /* Update the local FADT table header length */ + + acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); + /* * Expand the ACPI 1.0 32-bit addresses to the ACPI 2.0 64-bit "X" * generic address structures as necessary. Later code will always use From e1689795a784a7c41ac4cf9032794986b095a133 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:42 -0500 Subject: [PATCH 03/47] cpuidle: Add common time keeping and irq enabling Make necessary changes to implement time keeping and irq enabling in the core cpuidle code. This will allow the removal of these functionalities from various platform cpuidle implementations whose timekeeping and irq enabling follows the form in this common code. Signed-off-by: Robert Lee Tested-by: Jean Pihet Tested-by: Amit Daniel Tested-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Reviewed-by: Deepthi Dharwar Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/include/asm/cpuidle.h | 29 +++++++++++++++ arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/cpuidle.c | 21 +++++++++++ drivers/cpuidle/cpuidle.c | 66 ++++++++++++++++++++++++++++++---- include/linux/cpuidle.h | 13 ++++++- 5 files changed, 122 insertions(+), 9 deletions(-) create mode 100644 arch/arm/include/asm/cpuidle.h create mode 100644 arch/arm/kernel/cpuidle.c diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h new file mode 100644 index 000000000000..2fca60ab513a --- /dev/null +++ b/arch/arm/include/asm/cpuidle.h @@ -0,0 +1,29 @@ +#ifndef __ASM_ARM_CPUIDLE_H +#define __ASM_ARM_CPUIDLE_H + +#ifdef CONFIG_CPU_IDLE +extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); +#else +static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { return -ENODEV; } +#endif + +/* Common ARM WFI state */ +#define ARM_CPUIDLE_WFI_STATE_PWR(p) {\ + .enter = arm_cpuidle_simple_enter,\ + .exit_latency = 1,\ + .target_residency = 1,\ + .power_usage = p,\ + .flags = CPUIDLE_FLAG_TIME_VALID,\ + .name = "WFI",\ + .desc = "ARM WFI",\ +} + +/* + * in case power_specified == 1, give a default WFI power value needed + * by some governors + */ +#define ARM_CPUIDLE_WFI_STATE ARM_CPUIDLE_WFI_STATE_PWR(UINT_MAX) + +#endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 43b740d0e374..940c27fde498 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += compat.o obj-$(CONFIG_LEDS) += leds.o obj-$(CONFIG_OC_ETM) += etm.o - +obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_ARCH_ACORN) += ecard.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c new file mode 100644 index 000000000000..89545f6c8403 --- /dev/null +++ b/arch/arm/kernel/cpuidle.c @@ -0,0 +1,21 @@ +/* + * Copyright 2012 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include + +int arm_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + cpu_do_idle(); + + return index; +} diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 59f4261c753a..4869b5500234 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -53,6 +53,24 @@ static void cpuidle_kick_cpus(void) {} static int __cpuidle_register_device(struct cpuidle_device *dev); +static inline int cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + struct cpuidle_state *target_state = &drv->states[index]; + return target_state->enter(dev, drv, index); +} + +static inline int cpuidle_enter_tk(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + return cpuidle_wrap_enter(dev, drv, index, cpuidle_enter); +} + +typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); + +static cpuidle_enter_t cpuidle_enter_ops; + /** * cpuidle_idle_call - the main idle loop * @@ -63,7 +81,6 @@ int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_driver(); - struct cpuidle_state *target_state; int next_state, entered_state; if (off) @@ -92,12 +109,10 @@ int cpuidle_idle_call(void) return 0; } - target_state = &drv->states[next_state]; - trace_power_start(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle(next_state, dev->cpu); - entered_state = target_state->enter(dev, drv, next_state); + entered_state = cpuidle_enter_ops(dev, drv, next_state); trace_power_end(dev->cpu); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); @@ -110,6 +125,8 @@ int cpuidle_idle_call(void) dev->states_usage[entered_state].time += (unsigned long long)dev->last_residency; dev->states_usage[entered_state].usage++; + } else { + dev->last_residency = 0; } /* give the governor an opportunity to reflect on the outcome */ @@ -164,6 +181,37 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); +/** + * cpuidle_wrap_enter - performs timekeeping and irqen around enter function + * @dev: pointer to a valid cpuidle_device object + * @drv: pointer to a valid cpuidle_driver object + * @index: index of the target cpuidle state. + */ +int cpuidle_wrap_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index, + int (*enter)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index)) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + index = enter(dev, drv, index); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + #ifdef CONFIG_ARCH_HAS_CPU_RELAX static int poll_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -212,10 +260,11 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} int cpuidle_enable_device(struct cpuidle_device *dev) { int ret, i; + struct cpuidle_driver *drv = cpuidle_get_driver(); if (dev->enabled) return 0; - if (!cpuidle_get_driver() || !cpuidle_curr_governor) + if (!drv || !cpuidle_curr_governor) return -EIO; if (!dev->state_count) return -EINVAL; @@ -226,13 +275,16 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } - poll_idle_init(cpuidle_get_driver()); + cpuidle_enter_ops = drv->en_core_tk_irqen ? + cpuidle_enter_tk : cpuidle_enter; + + poll_idle_init(drv); if ((ret = cpuidle_add_state_sysfs(dev))) return ret; if (cpuidle_curr_governor->enable && - (ret = cpuidle_curr_governor->enable(cpuidle_get_driver(), dev))) + (ret = cpuidle_curr_governor->enable(drv, dev))) goto fail_sysfs; for (i = 0; i < dev->state_count; i++) { diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 712abcc205ae..927db28a2a4c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -15,6 +15,7 @@ #include #include #include +#include #define CPUIDLE_STATE_MAX 8 #define CPUIDLE_NAME_LEN 16 @@ -122,6 +123,8 @@ struct cpuidle_driver { struct module *owner; unsigned int power_specified:1; + /* set to 1 to use the core cpuidle time keeping (for all states). */ + unsigned int en_core_tk_irqen:1; struct cpuidle_state states[CPUIDLE_STATE_MAX]; int state_count; int safe_state_index; @@ -140,7 +143,10 @@ extern void cpuidle_pause_and_lock(void); extern void cpuidle_resume_and_unlock(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); - +extern int cpuidle_wrap_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index, + int (*enter)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index)); #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -157,6 +163,11 @@ static inline void cpuidle_resume_and_unlock(void) { } static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index, + int (*enter)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index)) +{ return -ENODEV; } #endif From 7e348b9012522fa0efd854d20d210d5e57fcedd1 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:43 -0500 Subject: [PATCH 04/47] ARM: at91: Consolidate time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-at91/cpuidle.c | 67 +++++++++++++----------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/arch/arm/mach-at91/cpuidle.c b/arch/arm/mach-at91/cpuidle.c index a851e6c98421..d40b3f317f7f 100644 --- a/arch/arm/mach-at91/cpuidle.c +++ b/arch/arm/mach-at91/cpuidle.c @@ -17,9 +17,10 @@ #include #include #include -#include #include #include +#include +#include #include "pm.h" @@ -27,66 +28,46 @@ static DEFINE_PER_CPU(struct cpuidle_device, at91_cpuidle_device); -static struct cpuidle_driver at91_idle_driver = { - .name = "at91_idle", - .owner = THIS_MODULE, -}; - /* Actual code that puts the SoC in different idle states */ static int at91_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - struct timeval before, after; - int idle_time; u32 saved_lpr; - local_irq_disable(); - do_gettimeofday(&before); - if (index == 0) - /* Wait for interrupt state */ - cpu_do_idle(); - else if (index == 1) { - asm("b 1f; .align 5; 1:"); - asm("mcr p15, 0, r0, c7, c10, 4"); /* drain write buffer */ - saved_lpr = sdram_selfrefresh_enable(); - cpu_do_idle(); - sdram_selfrefresh_disable(saved_lpr); - } - do_gettimeofday(&after); - local_irq_enable(); - idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC + - (after.tv_usec - before.tv_usec); + __asm__("b 1f; .align 5; 1:\n" + " mcr p15, 0, r0, c7, c10, 4"); /* drain write buffer */ + + saved_lpr = sdram_selfrefresh_enable(); + cpu_do_idle(); + sdram_selfrefresh_disable(saved_lpr); - dev->last_residency = idle_time; return index; } +static struct cpuidle_driver at91_idle_driver = { + .name = "at91_idle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = at91_enter_idle, + .exit_latency = 10, + .target_residency = 100000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "RAM_SR", + .desc = "WFI and DDR Self Refresh", + }, + .state_count = AT91_MAX_STATES, +}; + /* Initialize CPU idle by registering the idle states */ static int at91_init_cpuidle(void) { struct cpuidle_device *device; - struct cpuidle_driver *driver = &at91_idle_driver; device = &per_cpu(at91_cpuidle_device, smp_processor_id()); device->state_count = AT91_MAX_STATES; - driver->state_count = AT91_MAX_STATES; - - /* Wait for interrupt state */ - driver->states[0].enter = at91_enter_idle; - driver->states[0].exit_latency = 1; - driver->states[0].target_residency = 10000; - driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[0].name, "WFI"); - strcpy(driver->states[0].desc, "Wait for interrupt"); - - /* Wait for interrupt and RAM self refresh state */ - driver->states[1].enter = at91_enter_idle; - driver->states[1].exit_latency = 10; - driver->states[1].target_residency = 10000; - driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[1].name, "RAM_SR"); - strcpy(driver->states[1].desc, "WFI and RAM Self Refresh"); cpuidle_register_driver(&at91_idle_driver); From b334648db0ff2d07b00d81cf033c6eddff277680 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:44 -0500 Subject: [PATCH 05/47] ARM: kirkwood: Consolidate time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-kirkwood/cpuidle.c | 72 ++++++++++---------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/arch/arm/mach-kirkwood/cpuidle.c b/arch/arm/mach-kirkwood/cpuidle.c index 7088180b018b..0f1710941878 100644 --- a/arch/arm/mach-kirkwood/cpuidle.c +++ b/arch/arm/mach-kirkwood/cpuidle.c @@ -20,77 +20,47 @@ #include #include #include +#include #include #define KIRKWOOD_MAX_STATES 2 -static struct cpuidle_driver kirkwood_idle_driver = { - .name = "kirkwood_idle", - .owner = THIS_MODULE, -}; - -static DEFINE_PER_CPU(struct cpuidle_device, kirkwood_cpuidle_device); - /* Actual code that puts the SoC in different idle states */ static int kirkwood_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - struct timeval before, after; - int idle_time; - - local_irq_disable(); - do_gettimeofday(&before); - if (index == 0) - /* Wait for interrupt state */ - cpu_do_idle(); - else if (index == 1) { - /* - * Following write will put DDR in self refresh. - * Note that we have 256 cycles before DDR puts it - * self in self-refresh, so the wait-for-interrupt - * call afterwards won't get the DDR from self refresh - * mode. - */ - writel(0x7, DDR_OPERATION_BASE); - cpu_do_idle(); - } - do_gettimeofday(&after); - local_irq_enable(); - idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC + - (after.tv_usec - before.tv_usec); - - /* Update last residency */ - dev->last_residency = idle_time; + writel(0x7, DDR_OPERATION_BASE); + cpu_do_idle(); return index; } +static struct cpuidle_driver kirkwood_idle_driver = { + .name = "kirkwood_idle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = kirkwood_enter_idle, + .exit_latency = 10, + .target_residency = 100000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "DDR SR", + .desc = "WFI and DDR Self Refresh", + }, + .state_count = KIRKWOOD_MAX_STATES, +}; + +static DEFINE_PER_CPU(struct cpuidle_device, kirkwood_cpuidle_device); + /* Initialize CPU idle by registering the idle states */ static int kirkwood_init_cpuidle(void) { struct cpuidle_device *device; - struct cpuidle_driver *driver = &kirkwood_idle_driver; device = &per_cpu(kirkwood_cpuidle_device, smp_processor_id()); device->state_count = KIRKWOOD_MAX_STATES; - driver->state_count = KIRKWOOD_MAX_STATES; - - /* Wait for interrupt state */ - driver->states[0].enter = kirkwood_enter_idle; - driver->states[0].exit_latency = 1; - driver->states[0].target_residency = 10000; - driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[0].name, "WFI"); - strcpy(driver->states[0].desc, "Wait for interrupt"); - - /* Wait for interrupt and DDR self refresh state */ - driver->states[1].enter = kirkwood_enter_idle; - driver->states[1].exit_latency = 10; - driver->states[1].target_residency = 10000; - driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[1].name, "DDR SR"); - strcpy(driver->states[1].desc, "WFI and DDR Self Refresh"); cpuidle_register_driver(&kirkwood_idle_driver); if (cpuidle_register_device(device)) { From 19976c2a88d125aec16b9255c7197c297bbdd637 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:45 -0500 Subject: [PATCH 06/47] ARM: davinci: Consolidate time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-davinci/cpuidle.c | 82 +++++++++++++-------------------- 1 file changed, 33 insertions(+), 49 deletions(-) diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c index a30c7c5a6d83..93ae096c4ab2 100644 --- a/arch/arm/mach-davinci/cpuidle.c +++ b/arch/arm/mach-davinci/cpuidle.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -30,12 +31,42 @@ struct davinci_ops { u32 flags; }; +/* Actual code that puts the SoC in different idle states */ +static int davinci_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct davinci_ops *ops = cpuidle_get_statedata(state_usage); + + if (ops && ops->enter) + ops->enter(ops->flags); + + index = cpuidle_wrap_enter(dev, drv, index, + arm_cpuidle_simple_enter); + + if (ops && ops->exit) + ops->exit(ops->flags); + + return index; +} + /* fields in davinci_ops.flags */ #define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN BIT(0) static struct cpuidle_driver davinci_idle_driver = { - .name = "cpuidle-davinci", - .owner = THIS_MODULE, + .name = "cpuidle-davinci", + .owner = THIS_MODULE, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = davinci_enter_idle, + .exit_latency = 10, + .target_residency = 100000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "DDR SR", + .desc = "WFI and DDR Self Refresh", + }, + .state_count = DAVINCI_CPUIDLE_MAX_STATES, }; static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device); @@ -77,41 +108,10 @@ static struct davinci_ops davinci_states[DAVINCI_CPUIDLE_MAX_STATES] = { }, }; -/* Actual code that puts the SoC in different idle states */ -static int davinci_enter_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) -{ - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct davinci_ops *ops = cpuidle_get_statedata(state_usage); - struct timeval before, after; - int idle_time; - - local_irq_disable(); - do_gettimeofday(&before); - - if (ops && ops->enter) - ops->enter(ops->flags); - /* Wait for interrupt state */ - cpu_do_idle(); - if (ops && ops->exit) - ops->exit(ops->flags); - - do_gettimeofday(&after); - local_irq_enable(); - idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC + - (after.tv_usec - before.tv_usec); - - dev->last_residency = idle_time; - - return index; -} - static int __init davinci_cpuidle_probe(struct platform_device *pdev) { int ret; struct cpuidle_device *device; - struct cpuidle_driver *driver = &davinci_idle_driver; struct davinci_cpuidle_config *pdata = pdev->dev.platform_data; device = &per_cpu(davinci_cpuidle_device, smp_processor_id()); @@ -123,27 +123,11 @@ static int __init davinci_cpuidle_probe(struct platform_device *pdev) ddr2_reg_base = pdata->ddr2_ctlr_base; - /* Wait for interrupt state */ - driver->states[0].enter = davinci_enter_idle; - driver->states[0].exit_latency = 1; - driver->states[0].target_residency = 10000; - driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[0].name, "WFI"); - strcpy(driver->states[0].desc, "Wait for interrupt"); - - /* Wait for interrupt and DDR self refresh state */ - driver->states[1].enter = davinci_enter_idle; - driver->states[1].exit_latency = 10; - driver->states[1].target_residency = 10000; - driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID; - strcpy(driver->states[1].name, "DDR SR"); - strcpy(driver->states[1].desc, "WFI and DDR Self Refresh"); if (pdata->ddr2_pdown) davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN; cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]); device->state_count = DAVINCI_CPUIDLE_MAX_STATES; - driver->state_count = DAVINCI_CPUIDLE_MAX_STATES; ret = cpuidle_register_driver(&davinci_idle_driver); if (ret) { From 6da45dce3ce4a1f1f2548c9ad9954d57ab0625b3 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:46 -0500 Subject: [PATCH 07/47] ARM: omap: Consolidate OMAP3 time keeping and irq enable Use core cpuidle timekeeping and irqen wrapper and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Tested-by: Jean Pihet Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-omap2/cpuidle34xx.c | 42 +++++++++++++------------------ 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 464cffde58fe..535866489ce3 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -87,29 +87,14 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm, return 0; } -/** - * omap3_enter_idle - Programs OMAP3 to enter the specified state - * @dev: cpuidle device - * @drv: cpuidle driver - * @index: the index of state to be entered - * - * Called from the CPUidle framework to program the device to the - * specified target state selected by the governor. - */ -static int omap3_enter_idle(struct cpuidle_device *dev, +static int __omap3_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct omap3_idle_statedata *cx = cpuidle_get_statedata(&dev->states_usage[index]); - struct timespec ts_preidle, ts_postidle, ts_idle; u32 mpu_state = cx->mpu_state, core_state = cx->core_state; - int idle_time; - /* Used to keep track of the total time in idle */ - getnstimeofday(&ts_preidle); - - local_irq_disable(); local_fiq_disable(); pwrdm_set_next_pwrst(mpu_pd, mpu_state); @@ -148,21 +133,28 @@ static int omap3_enter_idle(struct cpuidle_device *dev, } return_sleep_time: - getnstimeofday(&ts_postidle); - ts_idle = timespec_sub(ts_postidle, ts_preidle); - local_irq_enable(); local_fiq_enable(); - idle_time = ts_idle.tv_nsec / NSEC_PER_USEC + ts_idle.tv_sec * \ - USEC_PER_SEC; - - /* Update cpuidle counters */ - dev->last_residency = idle_time; - return index; } +/** + * omap3_enter_idle - Programs OMAP3 to enter the specified state + * @dev: cpuidle device + * @drv: cpuidle driver + * @index: the index of state to be entered + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static inline int omap3_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + return cpuidle_wrap_enter(dev, drv, index, __omap3_enter_idle); +} + /** * next_valid_state - Find next valid C-state * @dev: cpuidle device From d13e9261ac026f90cccac1e6de3978fb18d4af7b Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:47 -0500 Subject: [PATCH 08/47] ARM: omap: Consolidate OMAP4 time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-omap2/cpuidle44xx.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 72e018b9b260..f386cbe9c889 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -62,15 +62,9 @@ static int omap4_enter_idle(struct cpuidle_device *dev, { struct omap4_idle_statedata *cx = cpuidle_get_statedata(&dev->states_usage[index]); - struct timespec ts_preidle, ts_postidle, ts_idle; u32 cpu1_state; - int idle_time; int cpu_id = smp_processor_id(); - /* Used to keep track of the total time in idle */ - getnstimeofday(&ts_preidle); - - local_irq_disable(); local_fiq_disable(); /* @@ -128,26 +122,17 @@ static int omap4_enter_idle(struct cpuidle_device *dev, if (index > 0) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id); - getnstimeofday(&ts_postidle); - ts_idle = timespec_sub(ts_postidle, ts_preidle); - - local_irq_enable(); local_fiq_enable(); - idle_time = ts_idle.tv_nsec / NSEC_PER_USEC + ts_idle.tv_sec * \ - USEC_PER_SEC; - - /* Update cpuidle counters */ - dev->last_residency = idle_time; - return index; } DEFINE_PER_CPU(struct cpuidle_device, omap4_idle_dev); struct cpuidle_driver omap4_idle_driver = { - .name = "omap4_idle", - .owner = THIS_MODULE, + .name = "omap4_idle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; static inline void _fill_cstate(struct cpuidle_driver *drv, From ee807dd89c0003a5cc0ec961132cd83542e5c30c Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:48 -0500 Subject: [PATCH 09/47] ARM: shmobile: Consolidate time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/arm/mach-shmobile/cpuidle.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/arch/arm/mach-shmobile/cpuidle.c b/arch/arm/mach-shmobile/cpuidle.c index 1b2334277e85..ca23b202b02d 100644 --- a/arch/arm/mach-shmobile/cpuidle.c +++ b/arch/arm/mach-shmobile/cpuidle.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static void shmobile_enter_wfi(void) @@ -29,37 +30,19 @@ static int shmobile_cpuidle_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - ktime_t before, after; - - before = ktime_get(); - - local_irq_disable(); - local_fiq_disable(); - shmobile_cpuidle_modes[index](); - local_irq_enable(); - local_fiq_enable(); - - after = ktime_get(); - dev->last_residency = ktime_to_ns(ktime_sub(after, before)) >> 10; - return index; } static struct cpuidle_device shmobile_cpuidle_dev; static struct cpuidle_driver shmobile_cpuidle_driver = { - .name = "shmobile_cpuidle", - .owner = THIS_MODULE, - .states[0] = { - .name = "C1", - .desc = "WFI", - .exit_latency = 1, - .target_residency = 1 * 2, - .flags = CPUIDLE_FLAG_TIME_VALID, - }, - .safe_state_index = 0, /* C1 */ - .state_count = 1, + .name = "shmobile_cpuidle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .safe_state_index = 0, /* C1 */ + .state_count = 1, }; void (*shmobile_cpuidle_setup)(struct cpuidle_driver *drv); From 5c48c873baf1a67b93d26770c8fe4d41f26f48af Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:49 -0500 Subject: [PATCH 10/47] SH: shmobile: Consolidate time keeping and irq enable Enable core cpuidle timekeeping and irq enabling and remove that handling from this code. Signed-off-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Acked-by: Jean Pihet Signed-off-by: Len Brown --- arch/sh/kernel/cpu/shmobile/cpuidle.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c index 6d62eb40e750..1ddc876d3b26 100644 --- a/arch/sh/kernel/cpu/shmobile/cpuidle.c +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -29,7 +29,6 @@ static int cpuidle_sleep_enter(struct cpuidle_device *dev, int index) { unsigned long allowed_mode = SUSP_SH_SLEEP; - ktime_t before, after; int requested_state = index; int allowed_state; int k; @@ -47,19 +46,16 @@ static int cpuidle_sleep_enter(struct cpuidle_device *dev, */ k = min_t(int, allowed_state, requested_state); - before = ktime_get(); sh_mobile_call_standby(cpuidle_mode[k]); - after = ktime_get(); - - dev->last_residency = (int)ktime_to_ns(ktime_sub(after, before)) >> 10; return k; } static struct cpuidle_device cpuidle_dev; static struct cpuidle_driver cpuidle_driver = { - .name = "sh_idle", - .owner = THIS_MODULE, + .name = "sh_idle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; void sh_mobile_setup_cpuidle(void) From b11de07ce561574b6e03c8192b28bad540da8f79 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 21 Mar 2012 12:55:00 -0700 Subject: [PATCH 11/47] drivers/thermal/thermal_sys.c: fix build warning With CONFIG_NET=n: drivers/thermal/thermal_sys.c:63: warning: 'thermal_event_seqnum' defined but not used Move 'thermal_event_seqnum' definition inside the '#ifdef CONFIG_NET' [akpm@linux-foundation.org: make thermal_event_seqnum local to generate_netlink_event()] Signed-off-by: Fabio Estevam Acked-by: Guenter Roeck Acked-by: Durgadoss R Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 220ce7e31cf5..859b80b6d376 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -60,8 +60,6 @@ static LIST_HEAD(thermal_tz_list); static LIST_HEAD(thermal_cdev_list); static DEFINE_MUTEX(thermal_list_lock); -static unsigned int thermal_event_seqnum; - static int get_idr(struct idr *idr, struct mutex *lock, int *id) { int err; @@ -1312,6 +1310,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) void *msg_header; int size; int result; + static unsigned int thermal_event_seqnum; /* allocate memory */ size = nla_total_size(sizeof(struct thermal_genl_event)) + \ From 886ee5463530036f6171e1376118e7014cf33f7f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Mar 2012 12:55:01 -0700 Subject: [PATCH 12/47] thermal_sys: remove unnecessary line continuations Line continations are not necessary in function calls or statements. Remove them. Signed-off-by: Joe Perches Reviewed-by: Jesper Juhl Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 859b80b6d376..71802caec2c3 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -281,8 +281,7 @@ passive_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); -static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, \ - passive_store); +static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store); static struct device_attribute trip_point_attrs[] = { __ATTR(trip_point_0_type, 0444, trip_point_type_show, NULL), @@ -1313,8 +1312,8 @@ int thermal_generate_netlink_event(u32 orig, enum events event) static unsigned int thermal_event_seqnum; /* allocate memory */ - size = nla_total_size(sizeof(struct thermal_genl_event)) + \ - nla_total_size(0); + size = nla_total_size(sizeof(struct thermal_genl_event)) + + nla_total_size(0); skb = genlmsg_new(size, GFP_ATOMIC); if (!skb) @@ -1330,8 +1329,8 @@ int thermal_generate_netlink_event(u32 orig, enum events event) } /* fill the data */ - attr = nla_reserve(skb, THERMAL_GENL_ATTR_EVENT, \ - sizeof(struct thermal_genl_event)); + attr = nla_reserve(skb, THERMAL_GENL_ATTR_EVENT, + sizeof(struct thermal_genl_event)); if (!attr) { nlmsg_free(skb); From ec797685609da142588012d734e85d14cff9c7d2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Mar 2012 12:55:02 -0700 Subject: [PATCH 13/47] thermal_sys: remove obfuscating used-once macros These don't add any value as they are used only once and the surrounding code uses similar variable. Signed-off-by: Joe Perches Cc: Jesper Juhl Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 71802caec2c3..11237bc2aa9c 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -310,22 +310,6 @@ static struct device_attribute trip_point_attrs[] = { __ATTR(trip_point_11_temp, 0444, trip_point_temp_show, NULL), }; -#define TRIP_POINT_ATTR_ADD(_dev, _index, result) \ -do { \ - result = device_create_file(_dev, \ - &trip_point_attrs[_index * 2]); \ - if (result) \ - break; \ - result = device_create_file(_dev, \ - &trip_point_attrs[_index * 2 + 1]); \ -} while (0) - -#define TRIP_POINT_ATTR_REMOVE(_dev, _index) \ -do { \ - device_remove_file(_dev, &trip_point_attrs[_index * 2]); \ - device_remove_file(_dev, &trip_point_attrs[_index * 2 + 1]); \ -} while (0) - /* sys I/F for cooling device */ #define to_cooling_device(_dev) \ container_of(_dev, struct thermal_cooling_device, device) @@ -1196,7 +1180,12 @@ struct thermal_zone_device *thermal_zone_device_register(char *type, } for (count = 0; count < trips; count++) { - TRIP_POINT_ATTR_ADD(&tz->device, count, result); + result = device_create_file(&tz->device, + &trip_point_attrs[count * 2]); + if (result) + break; + result = device_create_file(&tz->device, + &trip_point_attrs[count * 2 + 1]); if (result) goto unregister; tz->ops->get_trip_type(tz, count, &trip_type); @@ -1276,9 +1265,12 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) if (tz->ops->get_mode) device_remove_file(&tz->device, &dev_attr_mode); - for (count = 0; count < tz->trips; count++) - TRIP_POINT_ATTR_REMOVE(&tz->device, count); - + for (count = 0; count < tz->trips; count++) { + device_remove_file(&tz->device, + &trip_point_attrs[count * 2]); + device_remove_file(&tz->device, + &trip_point_attrs[count * 2 + 1]); + } thermal_remove_hwmon_sysfs(tz); release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id); idr_destroy(&tz->idr); From caca8b803520b0694423e2ac0ee3d58650b04a12 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Mar 2012 12:55:02 -0700 Subject: [PATCH 14/47] thermal_sys: kernel style cleanups Just a few tidies to make it more like most kernel sources. A couple of long lines still remain. Signed-off-by: Joe Perches Reviewed-by: Jesper Juhl Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 11237bc2aa9c..db5d8f882668 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -64,7 +64,7 @@ static int get_idr(struct idr *idr, struct mutex *lock, int *id) { int err; - again: +again: if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) return -ENOMEM; @@ -816,15 +816,14 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, return 0; device_remove_file(&tz->device, &dev->attr); - remove_symbol_link: +remove_symbol_link: sysfs_remove_link(&tz->device.kobj, dev->name); - release_idr: +release_idr: release_idr(&tz->idr, &tz->lock, dev->id); - free_mem: +free_mem: kfree(dev); return result; } - EXPORT_SYMBOL(thermal_zone_bind_cooling_device); /** @@ -854,14 +853,13 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, return -ENODEV; - unbind: +unbind: device_remove_file(&tz->device, &pos->attr); sysfs_remove_link(&tz->device.kobj, pos->name); release_idr(&tz->idr, &tz->lock, pos->id); kfree(pos); return 0; } - EXPORT_SYMBOL(thermal_zone_unbind_cooling_device); static void thermal_release(struct device *dev) @@ -869,7 +867,8 @@ static void thermal_release(struct device *dev) struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; - if (!strncmp(dev_name(dev), "thermal_zone", sizeof "thermal_zone" - 1)) { + if (!strncmp(dev_name(dev), "thermal_zone", + sizeof("thermal_zone") - 1)) { tz = to_thermal_zone(dev); kfree(tz); } else { @@ -889,8 +888,9 @@ static struct class thermal_class = { * @devdata: device private data. * @ops: standard thermal cooling devices callbacks. */ -struct thermal_cooling_device *thermal_cooling_device_register( - char *type, void *devdata, const struct thermal_cooling_device_ops *ops) +struct thermal_cooling_device * +thermal_cooling_device_register(char *type, void *devdata, + const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device *cdev; struct thermal_zone_device *pos; @@ -955,12 +955,11 @@ struct thermal_cooling_device *thermal_cooling_device_register( if (!result) return cdev; - unregister: +unregister: release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id); device_unregister(&cdev->device); return ERR_PTR(result); } - EXPORT_SYMBOL(thermal_cooling_device_register); /** @@ -1005,7 +1004,6 @@ void thermal_cooling_device_unregister(struct device_unregister(&cdev->device); return; } - EXPORT_SYMBOL(thermal_cooling_device_unregister); /** @@ -1081,7 +1079,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) tz->last_temperature = temp; - leave: +leave: if (tz->passive) thermal_zone_device_set_polling(tz, tz->passive_delay); else if (tz->polling_delay) @@ -1221,12 +1219,11 @@ struct thermal_zone_device *thermal_zone_device_register(char *type, if (!result) return tz; - unregister: +unregister: release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id); device_unregister(&tz->device); return ERR_PTR(result); } - EXPORT_SYMBOL(thermal_zone_device_register); /** @@ -1278,7 +1275,6 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) device_unregister(&tz->device); return; } - EXPORT_SYMBOL(thermal_zone_device_unregister); #ifdef CONFIG_NET From c5a01dd52dc4903772f464ea580895ccc36e911d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Mar 2012 12:55:02 -0700 Subject: [PATCH 15/47] thermal_sys: convert printks to pr_ Use the current logging style. Remove PREFIX, add pr_fmt, convert the printks. All dmesg output now prefixed with "thermal_sys: ". Signed-off-by: Joe Perches Cc: Jesper Juhl Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index db5d8f882668..fab970d9e3e1 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -23,6 +23,8 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -39,8 +41,6 @@ MODULE_AUTHOR("Zhang Rui"); MODULE_DESCRIPTION("Generic thermal management sysfs support"); MODULE_LICENSE("GPL"); -#define PREFIX "Thermal: " - struct thermal_cooling_device_instance { int id; char name[THERMAL_NAME_LENGTH]; @@ -1023,8 +1023,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) if (tz->ops->get_temp(tz, &temp)) { /* get_temp failed - retry it later */ - printk(KERN_WARNING PREFIX "failed to read out thermal zone " - "%d\n", tz->id); + pr_warn("failed to read out thermal zone %d\n", tz->id); goto leave; } @@ -1039,9 +1038,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) ret = tz->ops->notify(tz, count, trip_type); if (!ret) { - printk(KERN_EMERG - "Critical temperature reached (%ld C), shutting down.\n", - temp/1000); + pr_emerg("Critical temperature reached (%ld C), shutting down\n", + temp/1000); orderly_poweroff(true); } } @@ -1345,7 +1343,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC); if (result) - printk(KERN_INFO "failed to send netlink event:%d", result); + pr_info("failed to send netlink event:%d\n", result); return result; } From 6a92c36688bd6d8e68e19ca9b5e41e8197921b59 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 21 Mar 2012 12:55:03 -0700 Subject: [PATCH 16/47] thermal: add support for thermal sensor present on SPEAr13xx machines ST's SPEAr13xx machines are based on CortexA9 ARM processors. These machines contain a thermal sensor for junction temperature monitoring. This patch adds support for this thermal sensor in existing thermal framework. [akpm@linux-foundation.org: little code cleanup] [akpm@linux-foundation.org: print the pointer correctly] [viresh.kumar@st.com: thermal/spear_thermal: add compilation dependency on PLAT_SPEAR] Signed-off-by: Vincenzo Frascino Signed-off-by: Viresh Kumar Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/Kconfig | 8 + drivers/thermal/Makefile | 1 + drivers/thermal/spear_thermal.c | 206 ++++++++++++++++++++ include/linux/platform_data/spear_thermal.h | 26 +++ 4 files changed, 241 insertions(+) create mode 100644 drivers/thermal/spear_thermal.c create mode 100644 include/linux/platform_data/spear_thermal.h diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f7f71b2d3101..514a691abea0 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -18,3 +18,11 @@ config THERMAL_HWMON depends on THERMAL depends on HWMON=y || HWMON=THERMAL default y + +config SPEAR_THERMAL + bool "SPEAr thermal sensor driver" + depends on THERMAL + depends on PLAT_SPEAR + help + Enable this to plug the SPEAr thermal sensor driver into the Linux + thermal framework diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 31108a01c22e..a9fff0bf4b14 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_THERMAL) += thermal_sys.o +obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o \ No newline at end of file diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c new file mode 100644 index 000000000000..880bf289aa64 --- /dev/null +++ b/drivers/thermal/spear_thermal.c @@ -0,0 +1,206 @@ +/* + * SPEAr thermal driver. + * + * Copyright (C) 2011-2012 ST Microelectronics + * Author: Vincenzo Frascino + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MD_FACTOR 1000 + +/* SPEAr Thermal Sensor Dev Structure */ +struct spear_thermal_dev { + /* pointer to base address of the thermal sensor */ + void __iomem *thermal_base; + /* clk structure */ + struct clk *clk; + /* pointer to thermal flags */ + unsigned int flags; +}; + +static inline int thermal_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct spear_thermal_dev *stdev = thermal->devdata; + + /* + * Data are ready to be read after 628 usec from POWERDOWN signal + * (PDN) = 1 + */ + *temp = (readl(stdev->thermal_base) & 0x7F) * MD_FACTOR; + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = thermal_get_temp, +}; + +#ifdef CONFIG_PM +static int spear_thermal_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev); + struct spear_thermal_dev *stdev = spear_thermal->devdata; + unsigned int actual_mask = 0; + + /* Disable SPEAr Thermal Sensor */ + actual_mask = readl(stdev->thermal_base); + writel(actual_mask & ~stdev->flags, stdev->thermal_base); + + clk_disable(stdev->clk); + dev_info(dev, "Suspended.\n"); + + return 0; +} + +static int spear_thermal_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev); + struct spear_thermal_dev *stdev = spear_thermal->devdata; + unsigned int actual_mask = 0; + int ret = 0; + + ret = clk_enable(stdev->clk); + if (ret) { + dev_err(&pdev->dev, "Can't enable clock\n"); + return ret; + } + + /* Enable SPEAr Thermal Sensor */ + actual_mask = readl(stdev->thermal_base); + writel(actual_mask | stdev->flags, stdev->thermal_base); + + dev_info(dev, "Resumed.\n"); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(spear_thermal_pm_ops, spear_thermal_suspend, + spear_thermal_resume); + +static int spear_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *spear_thermal = NULL; + struct spear_thermal_dev *stdev; + struct spear_thermal_pdata *pdata; + int ret = 0; + struct resource *stres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!stres) { + dev_err(&pdev->dev, "memory resource missing\n"); + return -ENODEV; + } + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "platform data is NULL\n"); + return -EINVAL; + } + + stdev = devm_kzalloc(&pdev->dev, sizeof(*stdev), GFP_KERNEL); + if (!stdev) { + dev_err(&pdev->dev, "kzalloc fail\n"); + return -ENOMEM; + } + + /* Enable thermal sensor */ + stdev->thermal_base = devm_ioremap(&pdev->dev, stres->start, + resource_size(stres)); + if (!stdev->thermal_base) { + dev_err(&pdev->dev, "ioremap failed\n"); + return -ENOMEM; + } + + stdev->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(stdev->clk)) { + dev_err(&pdev->dev, "Can't get clock\n"); + return PTR_ERR(stdev->clk); + } + + ret = clk_enable(stdev->clk); + if (ret) { + dev_err(&pdev->dev, "Can't enable clock\n"); + goto put_clk; + } + + stdev->flags = pdata->thermal_flags; + writel(stdev->flags, stdev->thermal_base); + + spear_thermal = thermal_zone_device_register("spear_thermal", 0, + stdev, &ops, 0, 0, 0, 0); + if (!spear_thermal) { + dev_err(&pdev->dev, "thermal zone device is NULL\n"); + ret = -EINVAL; + goto disable_clk; + } + + platform_set_drvdata(pdev, spear_thermal); + + dev_info(&spear_thermal->device, "Thermal Sensor Loaded at: 0x%p.\n", + stdev->thermal_base); + + return 0; + +disable_clk: + clk_disable(stdev->clk); +put_clk: + clk_put(stdev->clk); + + return ret; +} + +static int spear_thermal_exit(struct platform_device *pdev) +{ + unsigned int actual_mask = 0; + struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev); + struct spear_thermal_dev *stdev = spear_thermal->devdata; + + thermal_zone_device_unregister(spear_thermal); + platform_set_drvdata(pdev, NULL); + + /* Disable SPEAr Thermal Sensor */ + actual_mask = readl(stdev->thermal_base); + writel(actual_mask & ~stdev->flags, stdev->thermal_base); + + clk_disable(stdev->clk); + clk_put(stdev->clk); + + return 0; +} + +static struct platform_driver spear_thermal_driver = { + .probe = spear_thermal_probe, + .remove = spear_thermal_exit, + .driver = { + .name = "spear_thermal", + .owner = THIS_MODULE, + .pm = &spear_thermal_pm_ops, + }, +}; + +module_platform_driver(spear_thermal_driver); + +MODULE_AUTHOR("Vincenzo Frascino "); +MODULE_DESCRIPTION("SPEAr thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/spear_thermal.h b/include/linux/platform_data/spear_thermal.h new file mode 100644 index 000000000000..724f2e1cbbcb --- /dev/null +++ b/include/linux/platform_data/spear_thermal.h @@ -0,0 +1,26 @@ +/* + * SPEAr thermal driver platform data. + * + * Copyright (C) 2011-2012 ST Microelectronics + * Author: Vincenzo Frascino + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef SPEAR_THERMAL_H +#define SPEAR_THERMAL_H + +/* SPEAr Thermal Sensor Platform Data */ +struct spear_thermal_pdata { + /* flags used to enable thermal sensor */ + unsigned int thermal_flags; +}; + +#endif /* SPEAR_THERMAL_H */ From de716e32e61fae5d1f0d000008d3f641cec5c9dd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 21 Mar 2012 12:55:03 -0700 Subject: [PATCH 17/47] thermal/spear_thermal: replace readl/writel with lighter _relaxed variants readl/writel versions for ARM contain memory barrier instruction for synchronizing DMA buffers. These are not required at least on this module. So use lighter _relaxed variants. Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/spear_thermal.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 880bf289aa64..be94413b95a3 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -46,7 +46,7 @@ static inline int thermal_get_temp(struct thermal_zone_device *thermal, * Data are ready to be read after 628 usec from POWERDOWN signal * (PDN) = 1 */ - *temp = (readl(stdev->thermal_base) & 0x7F) * MD_FACTOR; + *temp = (readl_relaxed(stdev->thermal_base) & 0x7F) * MD_FACTOR; return 0; } @@ -63,8 +63,8 @@ static int spear_thermal_suspend(struct device *dev) unsigned int actual_mask = 0; /* Disable SPEAr Thermal Sensor */ - actual_mask = readl(stdev->thermal_base); - writel(actual_mask & ~stdev->flags, stdev->thermal_base); + actual_mask = readl_relaxed(stdev->thermal_base); + writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); dev_info(dev, "Suspended.\n"); @@ -87,8 +87,8 @@ static int spear_thermal_resume(struct device *dev) } /* Enable SPEAr Thermal Sensor */ - actual_mask = readl(stdev->thermal_base); - writel(actual_mask | stdev->flags, stdev->thermal_base); + actual_mask = readl_relaxed(stdev->thermal_base); + writel_relaxed(actual_mask | stdev->flags, stdev->thermal_base); dev_info(dev, "Resumed.\n"); @@ -145,7 +145,7 @@ static int spear_thermal_probe(struct platform_device *pdev) } stdev->flags = pdata->thermal_flags; - writel(stdev->flags, stdev->thermal_base); + writel_relaxed(stdev->flags, stdev->thermal_base); spear_thermal = thermal_zone_device_register("spear_thermal", 0, stdev, &ops, 0, 0, 0, 0); @@ -180,8 +180,8 @@ static int spear_thermal_exit(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); /* Disable SPEAr Thermal Sensor */ - actual_mask = readl(stdev->thermal_base); - writel(actual_mask & ~stdev->flags, stdev->thermal_base); + actual_mask = readl_relaxed(stdev->thermal_base); + writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); clk_put(stdev->clk); From 03ee62f0b9919535a1be02f72fe8153255a7fda0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Mar 2012 12:55:04 -0700 Subject: [PATCH 18/47] thermal: spear13xx: checking for NULL instead of IS_ERR() thermal_zone_device_register() never returns NULL, on error it returns and ERR_PTR(). Signed-off-by: Dan Carpenter Reviewed-by: Viresh Kumar Reviewed-by: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/spear_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index be94413b95a3..c2e32df3b164 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -149,9 +149,9 @@ static int spear_thermal_probe(struct platform_device *pdev) spear_thermal = thermal_zone_device_register("spear_thermal", 0, stdev, &ops, 0, 0, 0, 0); - if (!spear_thermal) { + if (IS_ERR(spear_thermal)) { dev_err(&pdev->dev, "thermal zone device is NULL\n"); - ret = -EINVAL; + ret = PTR_ERR(spear_thermal); goto disable_clk; } From f1f0e2ac596f531c15b7b09ebeb8cfd011fffbd2 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Wed, 21 Mar 2012 16:40:01 +0530 Subject: [PATCH 19/47] thermal: Fix for setting the thermal zone mode to enable/disable Basically without this patch changing the mode of thermal zone is not possible as wrong string size is passed to strncmp. Signed-off-by: Amit Daniel Kachhap Acked-by: Jean Delvare Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index fab970d9e3e1..022bacb71a7e 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -150,9 +150,9 @@ mode_store(struct device *dev, struct device_attribute *attr, if (!tz->ops->set_mode) return -EPERM; - if (!strncmp(buf, "enabled", sizeof("enabled"))) + if (!strncmp(buf, "enabled", sizeof("enabled") - 1)) result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED); - else if (!strncmp(buf, "disabled", sizeof("disabled"))) + else if (!strncmp(buf, "disabled", sizeof("disabled") - 1)) result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED); else result = -EINVAL; From 6a6ea0acc9375571a13aa8c4e105a0807e1c16a4 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Wed, 21 Mar 2012 11:48:25 -0500 Subject: [PATCH 20/47] ARM: davinci: Fix for cpuidle consolidation changes The recent cpuidle consolidation changes erroneously omitted one critical line of code. Signed-off-by: Robert Lee Tested-by: Sekhar Nori Acked-by: Sekhar Nori Signed-off-by: Len Brown --- arch/arm/mach-davinci/cpuidle.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c index 93ae096c4ab2..9107691adbdb 100644 --- a/arch/arm/mach-davinci/cpuidle.c +++ b/arch/arm/mach-davinci/cpuidle.c @@ -55,10 +55,11 @@ static int davinci_enter_idle(struct cpuidle_device *dev, #define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN BIT(0) static struct cpuidle_driver davinci_idle_driver = { - .name = "cpuidle-davinci", - .owner = THIS_MODULE, - .states[0] = ARM_CPUIDLE_WFI_STATE, - .states[1] = { + .name = "cpuidle-davinci", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { .enter = davinci_enter_idle, .exit_latency = 10, .target_residency = 100000, From 2815ab92ba3ab27556212cc306288dc95692824b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Feb 2012 08:17:11 -0800 Subject: [PATCH 21/47] ACPI: Do cpufreq clamping for throttling per package v2 On Intel CPUs the processor typically uses the highest frequency set by any logical CPU. When the system overheats Linux first forces the frequency to the lowest available one to lower the temperature. However this was done only per logical CPU, which means all logical CPUs in a package would need to go through this before the frequency is actually lowered. Worse this delay actually prevents real throttling, because the real throttle code only proceeds when the lowest frequency is already reached. So when a throttle event happens force the lowest frequency for all CPUs in the package where it happened. The per CPU state is now kept per package, not per logical CPU. An alternative would be to do it per cpufreq unit, but since we want to bring down the temperature of the complete chip it's better to do it for all. In principle it may even make sense to do it for all CPUs, but I kept it on the package for now. With this change the frequency is actually lowered, which in terms also allows real throttling to proceed. I also removed an unnecessary per cpu variable initialization. v2: Fix package mapping Cc: Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- drivers/acpi/processor_thermal.c | 45 ++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 3b599abf2b40..641b5450a0db 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -57,6 +57,27 @@ ACPI_MODULE_NAME("processor_thermal"); static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg); static unsigned int acpi_thermal_cpufreq_is_init = 0; +#define reduction_pctg(cpu) \ + per_cpu(cpufreq_thermal_reduction_pctg, phys_package_first_cpu(cpu)) + +/* + * Emulate "per package data" using per cpu data (which should really be + * provided elsewhere) + * + * Note we can lose a CPU on cpu hotunplug, in this case we forget the state + * temporarily. Fortunately that's not a big issue here (I hope) + */ +static int phys_package_first_cpu(int cpu) +{ + int i; + int id = topology_physical_package_id(cpu); + + for_each_online_cpu(i) + if (topology_physical_package_id(i) == id) + return i; + return 0; +} + static int cpu_has_cpufreq(unsigned int cpu) { struct cpufreq_policy policy; @@ -76,7 +97,7 @@ static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb, max_freq = ( policy->cpuinfo.max_freq * - (100 - per_cpu(cpufreq_thermal_reduction_pctg, policy->cpu) * 20) + (100 - reduction_pctg(policy->cpu) * 20) ) / 100; cpufreq_verify_within_limits(policy, 0, max_freq); @@ -102,16 +123,28 @@ static int cpufreq_get_cur_state(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return 0; - return per_cpu(cpufreq_thermal_reduction_pctg, cpu); + return reduction_pctg(cpu); } static int cpufreq_set_cur_state(unsigned int cpu, int state) { + int i; + if (!cpu_has_cpufreq(cpu)) return 0; - per_cpu(cpufreq_thermal_reduction_pctg, cpu) = state; - cpufreq_update_policy(cpu); + reduction_pctg(cpu) = state; + + /* + * Update all the CPUs in the same package because they all + * contribute to the temperature and often share the same + * frequency. + */ + for_each_online_cpu(i) { + if (topology_physical_package_id(i) == + topology_physical_package_id(cpu)) + cpufreq_update_policy(i); + } return 0; } @@ -119,10 +152,6 @@ void acpi_thermal_cpufreq_init(void) { int i; - for (i = 0; i < nr_cpu_ids; i++) - if (cpu_present(i)) - per_cpu(cpufreq_thermal_reduction_pctg, i) = 0; - i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); if (!i) From e23da0370f80834e971142e50253f5b79be83631 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 6 Feb 2012 18:37:16 -0500 Subject: [PATCH 22/47] tools turbostat: add summary option turbostat -s cuts down on the amount of output, per user request. also treak some output whitespace and the man page. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 99 ++++++++++++++++----------- tools/power/x86/turbostat/turbostat.c | 90 ++++++++++++++++-------- 2 files changed, 120 insertions(+), 69 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 555c69a5592a..adf175f61496 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,11 +4,13 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB command .br .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB [ "\-i interval_sec" ] @@ -25,6 +27,8 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options +The \fB-s\fP option prints only a 1-line summary for each sample interval. +.PP The \fB-v\fP option increases verbosity. .PP The \fB-M MSR#\fP option dumps the specified MSR, @@ -39,13 +43,14 @@ displays the statistics gathered since it was forked. .SH FIELD DESCRIPTIONS .nf \fBpk\fP processor package number. -\fBcr\fP processor core number. +\fBcor\fP processor core number. \fBCPU\fP Linux CPU (logical processor) number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. \fB%c0\fP percent of the interval that the CPU retired instructions. \fBGHz\fP average clock rate while the CPU was in c0 state. \fBTSC\fP average GHz that the TSC ran during the entire interval. -\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. -\fB%pc3, %pc6\fP percentage residency in hardware package idle states. +\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. +\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. .fi .PP .SH EXAMPLE @@ -53,25 +58,37 @@ Without any parameters, turbostat prints out counters ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). -The first row of statistics reflect the average for the entire system. +The first row of statistics is a summary for the entire system. +Note that the summary is a weighted average. Subsequent rows show per-CPU statistics. .nf [root@x980]# ./turbostat -cr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 - 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 - 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 - 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 - 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 - 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 - 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 - 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 - 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 - 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 - 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 + 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 + 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 + 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 + 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 + 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 + 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 + 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 + 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 + 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 + 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 + 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 + 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 +.fi +.SH SUMMARY EXAMPLE +The "-s" option prints the column headers just once, +and then the one line system summary for each sample interval. + +.nf +[root@x980]# ./turbostat -s + %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 + 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 + 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 .fi .SH VERBOSE EXAMPLE The "-v" option adds verbosity to the output: @@ -101,33 +118,33 @@ until ^C while the other CPUs are mostly idle: .nf [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null - -^Ccr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 - 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 - 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 - 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 - 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 - 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 - 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 - 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 - 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 - 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 - 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 - 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 - 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 -6.950866 sec +^C +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 + 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 + 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 + 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 + 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 + 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 + 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 + 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 + 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 + 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 + 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 + 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 + 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 +4.907015 sec .fi -Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit +Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit while the other processors are generally in various states of idle. -Note that cpu3 is an HT sibling sharing core9 -with cpu9, and thus it is unable to get to an idle state -deeper than c1 while cpu9 is busy. +Note that cpu0 is an HT sibling sharing core0 +with cpu6, and thus it is unable to get to an idle state +deeper than c1 while cpu6 is busy. -Note that turbostat reports average GHz of 3.61, while -the arithmetic average of the GHz column above is 3.24. +Note that turbostat reports average GHz of 3.64, while +the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. .SH NOTES @@ -167,6 +184,6 @@ http://www.intel.com/products/processor/manuals/ .SH "SEE ALSO" msr(4), vmstat(8) .PP -.SH AUTHORS +.SH AUTHOR .nf Written by Len Brown diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 310d3dd5e547..6436d54378c7 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2,7 +2,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel turbo-capable processors. * - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2012 Intel Corporation. * Len Brown * * This program is free software; you can redistribute it and/or modify it @@ -49,6 +49,7 @@ char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ +unsigned int summary_only; /* set with -s */ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; @@ -129,14 +130,18 @@ void print_header(void) { if (show_pkg) fprintf(stderr, "pk"); + if (show_pkg) + fprintf(stderr, " "); if (show_core) - fprintf(stderr, " cr"); + fprintf(stderr, "cor"); if (show_cpu) fprintf(stderr, " CPU"); + if (show_pkg || show_core || show_cpu) + fprintf(stderr, " "); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + fprintf(stderr, " %%c0"); if (has_aperf) - fprintf(stderr, " GHz"); + fprintf(stderr, " GHz"); fprintf(stderr, " TSC"); if (do_nhm_cstates) fprintf(stderr, " %%c1"); @@ -147,13 +152,13 @@ void print_header(void) if (do_snb_cstates) fprintf(stderr, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2"); + fprintf(stderr, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3"); + fprintf(stderr, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6"); + fprintf(stderr, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7"); + fprintf(stderr, " %%pc7"); if (extra_msr_offset) fprintf(stderr, " MSR 0x%x ", extra_msr_offset); @@ -187,6 +192,15 @@ void dump_list(struct counters *cnt) dump_cnt(cnt); } +/* + * column formatting convention & formats + * package: "pk" 2 columns %2d + * core: "cor" 3 columns %3d + * CPU: "CPU" 3 columns %3d + * GHz: "GHz" 3 columns %3.2 + * TSC: "TSC" 3 columns %3.2 + * percentage " %pc3" %6.2 + */ void print_cnt(struct counters *p) { double interval_float; @@ -196,39 +210,45 @@ void print_cnt(struct counters *p) /* topology columns, print blanks on 1st (average) line */ if (p == cnt_average) { if (show_pkg) + fprintf(stderr, " "); + if (show_pkg && show_core) fprintf(stderr, " "); if (show_core) - fprintf(stderr, " "); + fprintf(stderr, " "); if (show_cpu) - fprintf(stderr, " "); + fprintf(stderr, " " " "); } else { if (show_pkg) - fprintf(stderr, "%d", p->pkg); + fprintf(stderr, "%2d", p->pkg); + if (show_pkg && show_core) + fprintf(stderr, " "); if (show_core) - fprintf(stderr, "%4d", p->core); + fprintf(stderr, "%3d", p->core); if (show_cpu) - fprintf(stderr, "%4d", p->cpu); + fprintf(stderr, " %3d", p->cpu); } /* %c0 */ if (do_nhm_cstates) { + if (show_pkg || show_core || show_cpu) + fprintf(stderr, " "); if (!skip_c0) - fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); + fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); else - fprintf(stderr, " ****"); + fprintf(stderr, " ****"); } /* GHz */ if (has_aperf) { if (!aperf_mperf_unstable) { - fprintf(stderr, "%5.2f", + fprintf(stderr, " %3.2f", 1.0 * p->tsc / units * p->aperf / p->mperf / interval_float); } else { if (p->aperf > p->tsc || p->mperf > p->tsc) { - fprintf(stderr, " ****"); + fprintf(stderr, " ***"); } else { - fprintf(stderr, "%4.1f*", + fprintf(stderr, "%3.1f*", 1.0 * p->tsc / units * p->aperf / p->mperf / interval_float); @@ -241,7 +261,7 @@ void print_cnt(struct counters *p) if (do_nhm_cstates) { if (!skip_c1) - fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); else fprintf(stderr, " ****"); } @@ -252,13 +272,13 @@ void print_cnt(struct counters *p) if (do_snb_cstates) fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); if (extra_msr_offset) fprintf(stderr, " 0x%016llx", p->extra_msr); putc('\n', stderr); @@ -267,12 +287,20 @@ void print_cnt(struct counters *p) void print_counters(struct counters *counters) { struct counters *cnt; + static int printed; - print_header(); + + if (!printed || !summary_only) + print_header(); if (num_cpus > 1) print_cnt(cnt_average); + printed = 1; + + if (summary_only) + return; + for (cnt = counters; cnt != NULL; cnt = cnt->next) print_cnt(cnt); @@ -557,7 +585,8 @@ void insert_counters(struct counters **list, return; } - show_cpu = 1; /* there is more than one CPU */ + if (!summary_only) + show_cpu = 1; /* there is more than one CPU */ /* * insert on front of list. @@ -575,13 +604,15 @@ void insert_counters(struct counters **list, while (prev->next && (prev->next->pkg < new->pkg)) { prev = prev->next; - show_pkg = 1; /* there is more than 1 package */ + if (!summary_only) + show_pkg = 1; /* there is more than 1 package */ } while (prev->next && (prev->next->pkg == new->pkg) && (prev->next->core < new->core)) { prev = prev->next; - show_core = 1; /* there is more than 1 core */ + if (!summary_only) + show_core = 1; /* there is more than 1 core */ } while (prev->next && (prev->next->pkg == new->pkg) @@ -1005,8 +1036,11 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { switch (opt) { + case 's': + summary_only++; + break; case 'v': verbose++; break; From 88c3281f7ba449992f7a33bd2452a8c6fa5503cb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 21:44:40 -0400 Subject: [PATCH 23/47] tools turbostat: reduce measurement overhead due to IPIs turbostat uses /dev/cpu/*/msr interface to read MSRs. For modern systems, it reads 10 MSR/CPU. This can be observed as 10 "Function Call Interrupts" per CPU per sample added to /proc/interrupts. This overhead is measurable on large idle systems, and as Yoquan Song pointed out, it can even trick cpuidle into thinking the system is busy. Here turbostat re-schedules itself in-turn to each CPU so that its MSR reads will always be local. This replaces the 10 "Function Call Interrupts" with a single "Rescheduling interrupt" per sample per CPU. On an idle 32-CPU system, this shifts some residency from the shallow c1 state to the deeper c7 state: # ./turbostat.old -s %c0 GHz TSC %c1 %c3 %c6 %c7 %pc2 %pc3 %pc6 %pc7 0.27 1.29 2.29 0.95 0.02 0.00 98.77 20.23 0.00 77.41 0.00 0.25 1.24 2.29 0.98 0.02 0.00 98.75 20.34 0.03 77.74 0.00 0.27 1.22 2.29 0.54 0.00 0.00 99.18 20.64 0.00 77.70 0.00 0.26 1.22 2.29 1.22 0.00 0.00 98.52 20.22 0.00 77.74 0.00 0.26 1.38 2.29 0.78 0.02 0.00 98.95 20.51 0.05 77.56 0.00 ^C i# ./turbostat.new -s %c0 GHz TSC %c1 %c3 %c6 %c7 %pc2 %pc3 %pc6 %pc7 0.27 1.20 2.29 0.24 0.01 0.00 99.49 20.58 0.00 78.20 0.00 0.27 1.22 2.29 0.25 0.00 0.00 99.48 20.79 0.00 77.85 0.00 0.27 1.20 2.29 0.25 0.02 0.00 99.46 20.71 0.03 77.89 0.00 0.28 1.26 2.29 0.25 0.01 0.00 99.46 20.89 0.02 77.67 0.00 0.27 1.20 2.29 0.24 0.01 0.00 99.48 20.65 0.00 78.04 0.00 cc: Youquan Song Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6436d54378c7..fa60872b9474 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -19,6 +19,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE @@ -72,6 +74,8 @@ char *progname; int need_reinitialize; int num_cpus; +cpu_set_t *cpu_mask; +size_t cpu_mask_size; struct counters { unsigned long long tsc; /* per thread */ @@ -100,6 +104,40 @@ struct timeval tv_even; struct timeval tv_odd; struct timeval tv_delta; +/* + * cpu_mask_init(ncpus) + * + * allocate and clear cpu_mask + * set cpu_mask_size + */ +void cpu_mask_init(int ncpus) +{ + cpu_mask = CPU_ALLOC(ncpus); + if (cpu_mask == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_mask_size = CPU_ALLOC_SIZE(ncpus); + CPU_ZERO_S(cpu_mask_size, cpu_mask); +} + +void cpu_mask_uninit() +{ + CPU_FREE(cpu_mask); + cpu_mask = NULL; + cpu_mask_size = 0; +} + +int cpu_migrate(int cpu) +{ + CPU_ZERO_S(cpu_mask_size, cpu_mask); + CPU_SET_S(cpu, cpu_mask_size, cpu_mask); + if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) + return -1; + else + return 0; +} + unsigned long long get_msr(int cpu, off_t offset) { ssize_t retval; @@ -471,6 +509,11 @@ void compute_average(struct counters *delta, struct counters *avg) void get_counters(struct counters *cnt) { for ( ; cnt; cnt = cnt->next) { + if (cpu_migrate(cnt->cpu)) { + need_reinitialize = 1; + return; + } + cnt->tsc = get_msr(cnt->cpu, MSR_TSC); if (do_nhm_cstates) cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); @@ -752,6 +795,8 @@ void re_initialize(void) free_all_counters(); num_cpus = for_all_cpus(alloc_new_counters); need_reinitialize = 0; + cpu_mask_uninit(); + cpu_mask_init(num_cpus); printf("num_cpus is now %d\n", num_cpus); } @@ -984,6 +1029,7 @@ void turbostat_init() check_super_user(); num_cpus = for_all_cpus(alloc_new_counters); + cpu_mask_init(num_cpus); if (verbose) print_nehalem_info(); From 15aaa34654831e98dd76f7738b6c7f5d05a66430 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 22:19:58 -0400 Subject: [PATCH 24/47] tools turbostat: harden against cpu online/offline Sometimes users have turbostat running in interval mode when they take processors offline/online. Previously, turbostat would survive, but not gracefully. Tighten up the error checking so turbostat notices changesn sooner, and print just 1 line on change: turbostat: re-initialized with num_cpus %d Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 115 ++++++++++++++------------ 1 file changed, 61 insertions(+), 54 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa60872b9474..ab2f682fd44c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -71,7 +71,6 @@ unsigned int show_cpu; int aperf_mperf_unstable; int backwards_count; char *progname; -int need_reinitialize; int num_cpus; cpu_set_t *cpu_mask; @@ -138,30 +137,24 @@ int cpu_migrate(int cpu) return 0; } -unsigned long long get_msr(int cpu, off_t offset) +int get_msr(int cpu, off_t offset, unsigned long long *msr) { ssize_t retval; - unsigned long long msr; char pathname[32]; int fd; sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); - if (fd < 0) { - perror(pathname); - need_reinitialize = 1; - return 0; - } - - retval = pread(fd, &msr, sizeof msr, offset); - if (retval != sizeof msr) { - fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", - cpu, offset, retval); - exit(-2); - } + if (fd < 0) + return -1; + retval = pread(fd, msr, sizeof *msr, offset); close(fd); - return msr; + + if (retval != sizeof *msr) + return -1; + + return 0; } void print_header(void) @@ -506,36 +499,51 @@ void compute_average(struct counters *delta, struct counters *avg) free(sum); } -void get_counters(struct counters *cnt) +int get_counters(struct counters *cnt) { for ( ; cnt; cnt = cnt->next) { - if (cpu_migrate(cnt->cpu)) { - need_reinitialize = 1; - return; + + if (cpu_migrate(cnt->cpu)) + return -1; + + if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) + return -1; + + if (has_aperf) { + if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) + return -1; + if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) + return -1; + } + + if (do_nhm_cstates) { + if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) + return -1; + if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) + return -1; } - cnt->tsc = get_msr(cnt->cpu, MSR_TSC); - if (do_nhm_cstates) - cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY); if (do_snb_cstates) - cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY); - if (has_aperf) - cnt->aperf = get_msr(cnt->cpu, MSR_APERF); - if (has_aperf) - cnt->mperf = get_msr(cnt->cpu, MSR_MPERF); - if (do_snb_cstates) - cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY); - if (do_nhm_cstates) - cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY); - if (do_snb_cstates) - cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY); + if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) + return -1; + + if (do_nhm_cstates) { + if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) + return -1; + if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) + return -1; + } + if (do_snb_cstates) { + if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) + return -1; + if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) + return -1; + } if (extra_msr_offset) - cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset); + if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) + return -1; } + return 0; } void print_nehalem_info(void) @@ -546,7 +554,7 @@ void print_nehalem_info(void) if (!do_nehalem_platform_info) return; - msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); + get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -562,7 +570,7 @@ void print_nehalem_info(void) if (!do_nehalem_turbo_ratio_limit) return; - msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); + get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); ratio = (msr >> 24) & 0xFF; if (ratio) @@ -755,7 +763,7 @@ int get_core_id(int cpu) } /* - * run func(index, cpu) on every cpu in /proc/stat + * run func(pkg, core, cpu) on every cpu in /proc/stat */ int for_all_cpus(void (func)(int, int, int)) @@ -791,20 +799,18 @@ int for_all_cpus(void (func)(int, int, int)) void re_initialize(void) { - printf("turbostat: topology changed, re-initializing.\n"); free_all_counters(); num_cpus = for_all_cpus(alloc_new_counters); - need_reinitialize = 0; cpu_mask_uninit(); cpu_mask_init(num_cpus); - printf("num_cpus is now %d\n", num_cpus); + printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); } void dummy(int pkg, int core, int cpu) { return; } /* * check to see if a cpu came on-line */ -void verify_num_cpus(void) +int verify_num_cpus(void) { int new_num_cpus; @@ -814,8 +820,9 @@ void verify_num_cpus(void) if (verbose) printf("num_cpus was %d, is now %d\n", num_cpus, new_num_cpus); - need_reinitialize = 1; + return -1; } + return 0; } void turbostat_loop() @@ -825,25 +832,25 @@ restart: gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { - verify_num_cpus(); - if (need_reinitialize) { + if (verify_num_cpus()) { re_initialize(); goto restart; } sleep(interval_sec); - get_counters(cnt_odd); + if (get_counters(cnt_odd)) { + re_initialize(); + goto restart; + } gettimeofday(&tv_odd, (struct timezone *)NULL); - compute_delta(cnt_odd, cnt_even, cnt_delta); timersub(&tv_odd, &tv_even, &tv_delta); compute_average(cnt_delta, cnt_average); print_counters(cnt_delta); - if (need_reinitialize) { + sleep(interval_sec); + if (get_counters(cnt_even)) { re_initialize(); goto restart; } - sleep(interval_sec); - get_counters(cnt_even); gettimeofday(&tv_even, (struct timezone *)NULL); compute_delta(cnt_even, cnt_odd, cnt_delta); timersub(&tv_even, &tv_odd, &tv_delta); From d6795fe32da13bde39ea483e42799a22daa730b5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Feb 2012 08:17:08 -0800 Subject: [PATCH 25/47] ACPI: ec: Do request_region outside WARN() WARN() is not supposed to have side effects, so move the request_regions outside. Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- drivers/acpi/ec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index b19a18dd994f..3268dcfbaa9b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -812,10 +812,10 @@ static int acpi_ec_add(struct acpi_device *device) first_ec = ec; device->driver_data = ec; - WARN(!request_region(ec->data_addr, 1, "EC data"), - "Could not request EC data io port 0x%lx", ec->data_addr); - WARN(!request_region(ec->command_addr, 1, "EC cmd"), - "Could not request EC cmd io port 0x%lx", ec->command_addr); + ret = !!request_region(ec->data_addr, 1, "EC data"); + WARN(!ret, "Could not request EC data io port 0x%lx", ec->data_addr); + ret = !!request_region(ec->command_addr, 1, "EC cmd"); + WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr); pr_info(PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n", ec->gpe, ec->command_addr, ec->data_addr); From 6fe0d0628245fdcd6fad8b837c81e8f7ebc3364d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Feb 2012 08:17:09 -0800 Subject: [PATCH 26/47] ACPI: Make ACPI interrupt threaded Some ACPI interrupt actions may need to wait, and it's easiest to have a thread context for this. So turn the ACPI interrupt into a threaded interrupt. Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- drivers/acpi/osl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 412a1e04a922..02367a8a60e9 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -595,7 +595,8 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler, acpi_irq_handler = handler; acpi_irq_context = context; - if (request_irq(irq, acpi_irq, IRQF_SHARED, "acpi", acpi_irq)) { + if (request_threaded_irq(irq, NULL, acpi_irq, IRQF_SHARED, "acpi", + acpi_irq)) { printk(KERN_ERR PREFIX "SCI (IRQ%d) allocation failed\n", irq); acpi_irq_handler = NULL; return AE_NOT_ACQUIRED; From 3a53396b0381ec9d5180fd8fe7a681c8ce95fd9a Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Wed, 28 Mar 2012 15:19:11 -0700 Subject: [PATCH 27/47] cpuidle: add a sysfs entry to disable specific C state for debug purpose. Some C states of new CPU might be not good. One reason is BIOS might configure them incorrectly. To help developers root cause it quickly, the patch adds a new sysfs entry, so developers could disable specific C state manually. In addition, C state might have much impact on performance tuning, as it takes much time to enter/exit C states, which might delay interrupt processing. With the new debug option, developers could check if a deep C state could impact performance and how much impact it could cause. Also add this option in Documentation/cpuidle/sysfs.txt. [akpm@linux-foundation.org: check kstrtol return value] Signed-off-by: ShuoX Liu Reviewed-by: Yanmin Zhang Reviewed-and-Tested-by: Deepthi Dharwar Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- Documentation/cpuidle/sysfs.txt | 5 ++++ drivers/cpuidle/cpuidle.c | 1 + drivers/cpuidle/governors/menu.c | 5 +++- drivers/cpuidle/sysfs.c | 40 ++++++++++++++++++++++++++++++++ include/linux/cpuidle.h | 1 + 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt index 50d7b1642759..9d28a3406e74 100644 --- a/Documentation/cpuidle/sysfs.txt +++ b/Documentation/cpuidle/sysfs.txt @@ -36,6 +36,7 @@ drwxr-xr-x 2 root root 0 Feb 8 10:42 state3 /sys/devices/system/cpu/cpu0/cpuidle/state0: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -45,6 +46,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state1: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -54,6 +56,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state2: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -63,6 +66,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state3: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -72,6 +76,7 @@ total 0 * desc : Small description about the idle state (string) +* disable : Option to disable this idle state (bool) * latency : Latency to exit out of this idle state (in microseconds) * name : Name of the idle state (string) * power : Power consumed while in this idle state (in milliwatts) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 4869b5500234..77304b6b8aef 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -245,6 +245,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) state->power_usage = -1; state->flags = 0; state->enter = poll_idle; + state->disable = 0; } #else static void poll_idle_init(struct cpuidle_driver *drv) {} diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index ad0952601ae2..5c17ca112fc2 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -280,7 +280,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * We want to default to C1 (hlt), not to busy polling * unless the timer is happening really really soon. */ - if (data->expected_us > 5) + if (data->expected_us > 5 && + drv->states[CPUIDLE_DRIVER_STATE_START].disable == 0) data->last_state_idx = CPUIDLE_DRIVER_STATE_START; /* @@ -290,6 +291,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; + if (s->disable) + continue; if (s->target_residency > data->predicted_us) continue; if (s->exit_latency > latency_req) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 3fe41fe4851a..88032b4dc6d2 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -222,6 +223,9 @@ struct cpuidle_state_attr { #define define_one_state_ro(_name, show) \ static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL) +#define define_one_state_rw(_name, show, store) \ +static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0644, show, store) + #define define_show_state_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, char *buf) \ @@ -229,6 +233,24 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ return sprintf(buf, "%u\n", state->_name);\ } +#define define_store_state_function(_name) \ +static ssize_t store_state_##_name(struct cpuidle_state *state, \ + const char *buf, size_t size) \ +{ \ + long value; \ + int err; \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + err = kstrtol(buf, 0, &value); \ + if (err) \ + return err; \ + if (value) \ + state->disable = 1; \ + else \ + state->disable = 0; \ + return size; \ +} + #define define_show_state_ull_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, char *buf) \ @@ -251,6 +273,8 @@ define_show_state_ull_function(usage) define_show_state_ull_function(time) define_show_state_str_function(name) define_show_state_str_function(desc) +define_show_state_function(disable) +define_store_state_function(disable) define_one_state_ro(name, show_state_name); define_one_state_ro(desc, show_state_desc); @@ -258,6 +282,7 @@ define_one_state_ro(latency, show_state_exit_latency); define_one_state_ro(power, show_state_power_usage); define_one_state_ro(usage, show_state_usage); define_one_state_ro(time, show_state_time); +define_one_state_rw(disable, show_state_disable, store_state_disable); static struct attribute *cpuidle_state_default_attrs[] = { &attr_name.attr, @@ -266,6 +291,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_power.attr, &attr_usage.attr, &attr_time.attr, + &attr_disable.attr, NULL }; @@ -287,8 +313,22 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } +static ssize_t cpuidle_state_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_state *state = kobj_to_state(kobj); + struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); + + if (cattr->store) + ret = cattr->store(state, buf, size); + + return ret; +} + static const struct sysfs_ops cpuidle_state_sysfs_ops = { .show = cpuidle_state_show, + .store = cpuidle_state_store, }; static void cpuidle_state_sysfs_release(struct kobject *kobj) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 927db28a2a4c..ca4e4983773f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -46,6 +46,7 @@ struct cpuidle_state { unsigned int exit_latency; /* in US */ unsigned int power_usage; /* in mW */ unsigned int target_residency; /* in US */ + unsigned int disable; int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, From fc850f39ea54c760ce438a601cfea8ab80c4898e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 26 Mar 2012 14:51:26 +0200 Subject: [PATCH 28/47] cpuidle: use the driver's state_count as default If the state_count is not initialized for the device use the driver's state count as the default. That will prevent to add it manually in the cpuidle driver initialization routine and will save us from duplicate line of code. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- drivers/cpuidle/driver.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 77304b6b8aef..f7cab5e9c4d6 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -268,7 +268,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!drv || !cpuidle_curr_governor) return -EIO; if (!dev->state_count) - return -EINVAL; + dev->state_count = drv->state_count; if (dev->registered == 0) { ret = __cpuidle_register_device(dev); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 284d7af5a9c8..40cd3f3024df 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -47,7 +47,7 @@ static void __cpuidle_register_driver(struct cpuidle_driver *drv) */ int cpuidle_register_driver(struct cpuidle_driver *drv) { - if (!drv) + if (!drv || !drv->state_count) return -EINVAL; if (cpuidle_disabled()) From db70b04407a63668e5fee773f76f90367492fd25 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 26 Mar 2012 14:51:27 +0200 Subject: [PATCH 29/47] cpuidle: remove useless array definition in cpuidle_structure All the modules name are ro-data, it is never copied to the array. eg. static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", .owner = THIS_MODULE, }; It safe to assign the pointer of this ro-data to a const char *. By this way we save 12 bytes. Signed-off-by: Daniel Lezcano Acked-by: Deepthi Dharwar Tested-by: Deepthi Dharwar Signed-off-by: Len Brown --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ca4e4983773f..f7f1d9040da3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -120,7 +120,7 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) ****************************/ struct cpuidle_driver { - char name[CPUIDLE_NAME_LEN]; + const char *name; struct module *owner; unsigned int power_specified:1; From e07510585a88c0f6c6c728e2e006aa913496d4ae Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 26 Mar 2012 14:51:28 +0200 Subject: [PATCH 30/47] cpuidle: remove unused 'governor_data' field As far as I can see, this field is never used in the code. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- include/linux/cpuidle.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f7f1d9040da3..f3ebbba368b3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -98,7 +98,6 @@ struct cpuidle_device { struct list_head device_list; struct kobject kobj; struct completion kobj_unregister; - void *governor_data; }; DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); From 9bcb8118965ab4631a65ee0726e6518f75cda6c5 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 1 Feb 2012 10:26:54 -0500 Subject: [PATCH 31/47] ACPI: Evaluate thermal trip points before reading temperature An HP laptop (Pavilion G4-1016tx) has the following code in _TMP: Store (\_SB.PCI0.LPCB.EC0.RTMP, Local0) If (LGreaterEqual (Local0, S4TP)) { Store (One, HTS4) } S4TP is initialised at 0 and not programmed further until either _HOT or _CRT is called. If we evaluate _TMP before the trip points then HTS4 will always be set, causing the firmware to generate a message on boot complaining that the system shut down because of overheating. The simplest solution is just to reverse the checking of trip points and _TMP in thermal init. Signed-off-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/thermal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 48fbc647b178..7dbebea1ec31 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -941,13 +941,13 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz) if (!tz) return -EINVAL; - /* Get temperature [_TMP] (required) */ - result = acpi_thermal_get_temperature(tz); + /* Get trip points [_CRT, _PSV, etc.] (required) */ + result = acpi_thermal_get_trip_points(tz); if (result) return result; - /* Get trip points [_CRT, _PSV, etc.] (required) */ - result = acpi_thermal_get_trip_points(tz); + /* Get temperature [_TMP] (required) */ + result = acpi_thermal_get_temperature(tz); if (result) return result; From c6436f5a395d346e9f4892d7aeed4c3f99261f0f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Feb 2012 17:04:43 -0700 Subject: [PATCH 32/47] ACPI / PM: print physical addresses consistently with other parts of kernel Print physical address info in a style consistent with the %pR style used elsewhere in the kernel. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/nvs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c index 7a2035fa8c71..266bc58ce0ce 100644 --- a/drivers/acpi/nvs.c +++ b/drivers/acpi/nvs.c @@ -95,8 +95,8 @@ static int suspend_nvs_register(unsigned long start, unsigned long size) { struct nvs_page *entry, *next; - pr_info("PM: Registering ACPI NVS region at %lx (%ld bytes)\n", - start, size); + pr_info("PM: Registering ACPI NVS region [mem %#010lx-%#010lx] (%ld bytes)\n", + start, start + size - 1, size); while (size > 0) { unsigned int nr_bytes; From 9f324bda970c599ca35f7be89d9d1bcb96d6053c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 19 Mar 2012 13:08:02 -0600 Subject: [PATCH 33/47] ACPI: Add CPU hotplug support for processor device objects acpi_processor_install_hotplug_notify() registers processor objects to receive ACPI CPU hotplug event notifications. This patch additionally registers processor device objects (ACPI0007) to receive the notifications as well. Signed-off-by: Toshi Kani Reviewed-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/processor_driver.c | 48 ++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 8ae05ce18500..50be27739fe8 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -68,6 +68,7 @@ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 +#define ACPI_PROCESSOR_DEVICE_HID "ACPI0007" #define ACPI_PROCESSOR_LIMIT_USER 0 #define ACPI_PROCESSOR_LIMIT_THERMAL 1 @@ -88,7 +89,7 @@ static int acpi_processor_start(struct acpi_processor *pr); static const struct acpi_device_id processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, 0}, - {"ACPI0007", 0}, + {ACPI_PROCESSOR_DEVICE_HID, 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, processor_device_ids); @@ -741,20 +742,46 @@ static void acpi_processor_hotplug_notify(acpi_handle handle, return; } +static acpi_status is_processor_device(acpi_handle handle) +{ + struct acpi_device_info *info; + char *hid; + acpi_status status; + + status = acpi_get_object_info(handle, &info); + if (ACPI_FAILURE(status)) + return status; + + if (info->type == ACPI_TYPE_PROCESSOR) { + kfree(info); + return AE_OK; /* found a processor object */ + } + + if (!(info->valid & ACPI_VALID_HID)) { + kfree(info); + return AE_ERROR; + } + + hid = info->hardware_id.string; + if ((hid == NULL) || strcmp(hid, ACPI_PROCESSOR_DEVICE_HID)) { + kfree(info); + return AE_ERROR; + } + + kfree(info); + return AE_OK; /* found a processor device object */ +} + static acpi_status processor_walk_namespace_cb(acpi_handle handle, u32 lvl, void *context, void **rv) { acpi_status status; int *action = context; - acpi_object_type type = 0; - status = acpi_get_type(handle, &type); + status = is_processor_device(handle); if (ACPI_FAILURE(status)) - return (AE_OK); - - if (type != ACPI_TYPE_PROCESSOR) - return (AE_OK); + return AE_OK; /* not a processor; continue to walk */ switch (*action) { case INSTALL_NOTIFY_HANDLER: @@ -772,7 +799,8 @@ processor_walk_namespace_cb(acpi_handle handle, break; } - return (AE_OK); + /* found a processor; skip walking underneath */ + return AE_CTRL_DEPTH; } static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr) @@ -830,7 +858,7 @@ void acpi_processor_install_hotplug_notify(void) { #ifdef CONFIG_ACPI_HOTPLUG_CPU int action = INSTALL_NOTIFY_HANDLER; - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, + acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, processor_walk_namespace_cb, NULL, &action, NULL); @@ -843,7 +871,7 @@ void acpi_processor_uninstall_hotplug_notify(void) { #ifdef CONFIG_ACPI_HOTPLUG_CPU int action = UNINSTALL_NOTIFY_HANDLER; - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, + acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, processor_walk_namespace_cb, NULL, &action, NULL); From 1a022e3f1be11730bd8747b1af96a0274bf6356e Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 13 Mar 2012 19:55:09 +0100 Subject: [PATCH 34/47] idle, x86: Allow off-lined CPU to enter deeper C states Currently when a CPU is off-lined it enters either MWAIT-based idle or, if MWAIT is not desired or supported, HLT-based idle (which places the processor in C1 state). This patch allows processors without MWAIT support to stay in states deeper than C1. Signed-off-by: Boris Ostrovsky Signed-off-by: Len Brown --- arch/x86/kernel/smpboot.c | 4 +++- drivers/acpi/processor_idle.c | 31 +++++++++++++++++++++++++++++++ drivers/cpuidle/cpuidle.c | 28 ++++++++++++++++++++++++++++ include/linux/cpuidle.h | 5 +++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 66d250c00d11..93a2a0932b51 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -1422,7 +1423,8 @@ void native_play_dead(void) tboot_shutdown(TB_SHUTDOWN_WFS); mwait_play_dead(); /* Only returns on failure */ - hlt_play_dead(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0e8e2de2ed3e..6b1d32a161ae 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -770,6 +770,35 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, return index; } + +/** + * acpi_idle_play_dead - enters an ACPI state for long-term idle (i.e. off-lining) + * @dev: the target CPU + * @index: the index of suggested state + */ +static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + + ACPI_FLUSH_CPU_CACHE(); + + while (1) { + + if (cx->entry_method == ACPI_CSTATE_HALT) + halt(); + else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { + inb(cx->address); + /* See comment in acpi_idle_do_entry() */ + inl(acpi_gbl_FADT.xpm_timer_block.address); + } else + return -ENODEV; + } + + /* Never reached */ + return 0; +} + /** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU @@ -1077,12 +1106,14 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index f7cab5e9c4d6..3e146b2ada4a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -71,6 +71,34 @@ typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev, static cpuidle_enter_t cpuidle_enter_ops; +/** + * cpuidle_play_dead - cpu off-lining + * + * Only returns in case of an error + */ +int cpuidle_play_dead(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_driver(); + int i, dead_state = -1; + int power_usage = -1; + + /* Find lowest-power state that supports long-term idle */ + for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + + if (s->power_usage < power_usage && s->enter_dead) { + power_usage = s->power_usage; + dead_state = i; + } + } + + if (dead_state != -1) + return drv->states[dead_state].enter_dead(dev, dead_state); + + return -ENODEV; +} + /** * cpuidle_idle_call - the main idle loop * diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f3ebbba368b3..d557bcd0ada7 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -51,6 +51,8 @@ struct cpuidle_state { int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); + + int (*enter_dead) (struct cpuidle_device *dev, int index); }; /* Idle State Flags */ @@ -147,6 +149,8 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index, int (*enter)(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)); +extern int cpuidle_play_dead(void); + #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -168,6 +172,7 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, int (*enter)(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)) { return -ENODEV; } +static inline int cpuidle_play_dead(void) {return -ENODEV; } #endif From 02401c06b7f6bec65f314e3cec7894502c973501 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 13 Mar 2012 19:55:10 +0100 Subject: [PATCH 35/47] cpuidle: power_usage should be declared signed integer power_usage is always assigned a negative value and should be declared a signed integer Signed-off-by: Boris Ostrovsky Signed-off-by: Len Brown --- drivers/cpuidle/governors/menu.c | 2 +- include/linux/cpuidle.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5c17ca112fc2..06335756ea14 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -236,7 +236,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); - unsigned int power_usage = -1; + int power_usage = -1; int i; int multiplier; struct timespec t; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d557bcd0ada7..6c26a3da0e03 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -44,7 +44,7 @@ struct cpuidle_state { unsigned int flags; unsigned int exit_latency; /* in US */ - unsigned int power_usage; /* in mW */ + int power_usage; /* in mW */ unsigned int target_residency; /* in US */ unsigned int disable; From c80f5b31f3c55a197f5323b93d1e3553429a427e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 15 Mar 2012 09:32:05 +0100 Subject: [PATCH 36/47] ACPI: processor_driver: add missing kfree The function acpi_processor_add is stored in the ops.add field of a acpi_driver structure. This function is then called in acpi_bus_driver_init. On failure, this function clears the field device->driver_data, but does not free its contents. Thus the free has to be done by the add function. In acpi_processor_add, the corresponding value is pr. This value is currently freed on failure before storing it in device->driver_data, but not after. This free is added in the error handling code at the end of the function. The per_cpu variable processors is also cleared so that it does not refer to a dangling pointer. Signed-off-by: Julia Lawall Reviewed-by: Srivatsa S. Bhat Acked-by: Deepthi Dharwar Signed-off-by: Len Brown --- drivers/acpi/processor_driver.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 8ae05ce18500..fce0066aa4a4 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -535,8 +535,8 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) return -ENOMEM; if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { - kfree(pr); - return -ENOMEM; + result = -ENOMEM; + goto err_free_pr; } pr->handle = device->handle; @@ -576,7 +576,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) dev = get_cpu_device(pr->id); if (sysfs_create_link(&device->dev.kobj, &dev->kobj, "sysdev")) { result = -EFAULT; - goto err_free_cpumask; + goto err_clear_processor; } /* @@ -594,9 +594,15 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) err_remove_sysfs: sysfs_remove_link(&device->dev.kobj, "sysdev"); +err_clear_processor: + /* + * processor_device_array is not cleared to allow checks for buggy BIOS + */ + per_cpu(processors, pr->id) = NULL; err_free_cpumask: free_cpumask_var(pr->throttling.shared_cpu_map); - +err_free_pr: + kfree(pr); return result; } From ac909ec308ce8d5177963c780564824d12bc3fa2 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Thu, 8 Mar 2012 13:33:24 -0800 Subject: [PATCH 37/47] ACPI: Fix use-after-free in acpi_map_lsapic When processor is being hot-added to the system, acpi_map_lsapic invokes ACPI _MAT method to find APIC ID and flags, verifies that returned structure is indeed ACPI's local APIC structure, and that flags contain MADT_ENABLED bit. Then saves APIC ID, frees structure - and accesses structure when computing arguments for acpi_register_lapic call. Which sometime leads to acpi_register_lapic call being made with second argument zero, failing to bring processor online with error 'Unable to map lapic to logical cpu number'. As lapic->lapic_flags & ACPI_MADT_ENABLED was already confirmed to be non-zero few lines above, we can just pass unconditional ACPI_MADT_ENABLED to the acpi_register_lapic. Signed-off-by: Petr Vandrovec Signed-off-by: Alok N Kataria Reviewed-by: Toshi Kani Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ce664f33ea8e..bbcc2c389ade 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) kfree(buffer.pointer); buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; + lapic = NULL; if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) goto out; @@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) goto free_tmp_map; cpumask_copy(tmp_map, cpu_present_mask); - acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu From 89e96ada572fb216e582dbe3f64e1a6939a37f74 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Mar 2012 13:29:20 -0800 Subject: [PATCH 38/47] PNPACPI: Fix device ref leaking in acpi_pnp_match During testing pci root bus removal, found some root bus bridge is not freed. If booting with pnpacpi=off, those hostbridge could be freed without problem. It turns out that some devices reference are not released during acpi_pnp_match. that match should not hold one device ref during every calling. Add pu_device calling before returning. Signed-off-by: Yinghai Lu Cc: stable@vger.kernel.org Signed-off-by: Len Brown --- drivers/pnp/pnpacpi/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index b00c17612a89..d21e8f59c84e 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -321,9 +321,14 @@ static int __init acpi_pnp_match(struct device *dev, void *_pnp) { struct acpi_device *acpi = to_acpi_device(dev); struct pnp_dev *pnp = _pnp; + struct device *physical_device; + + physical_device = acpi_get_physical_device(acpi->handle); + if (physical_device) + put_device(physical_device); /* true means it matched */ - return !acpi_get_physical_device(acpi->handle) + return !physical_device && compare_pnp_id(pnp->id, acpi_device_hid(acpi)); } From e252675fb722d4a307cc380a06a905f03cf9951c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 24 Feb 2012 11:41:53 +0000 Subject: [PATCH 39/47] ACPI: consistently use should_use_kmap() ... so that acpi_unmap()'s behavior gets in sync with acpi_map()'s. Signed-off-by: Jan Beulich Signed-off-by: Len Brown --- drivers/acpi/osl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 412a1e04a922..5aef087d42d0 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -347,7 +347,7 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) unsigned long pfn; pfn = pg_off >> PAGE_SHIFT; - if (page_is_ram(pfn)) + if (should_use_kmap(pfn)) kunmap(pfn_to_page(pfn)); else iounmap(vaddr); From 9505626d7bfeb5bd4b85acb483831ac640b2a5e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Feb 2012 13:27:44 -0800 Subject: [PATCH 40/47] ACPI: Fix unprotected smp_processor_id() in acpi_processor_cst_has_changed() The acpi_processor_cst_has_changed() function is invoked from a CPU_ONLINE or CPU_DEAD function, which might well execute on CPU 0 even though the CPU being hotplugged is some other CPU. In addition, acpi_processor_cst_has_changed() invokes smp_processor_id() without protection, resulting in splats when onlining CPUs. This commit therefore changes the smp_processor_id() to pr->id, as is used elsewhere in the code, for example, in acpi_processor_add(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Srivatsa S. Bhat Tested-by: Yong Zhang Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0e8e2de2ed3e..9e57b06d1f24 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1159,8 +1159,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) * to make the code that updates C-States be called once. */ - if (smp_processor_id() == 0 && - cpuidle_get_driver() == &acpi_idle_driver) { + if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) { cpuidle_pause_and_lock(); /* Protect against cpu-hotplug */ From 37239978778806ecba54da60676abb46870acebb Mon Sep 17 00:00:00 2001 From: Alex He Date: Tue, 21 Feb 2012 16:58:10 +0800 Subject: [PATCH 41/47] ACPI: Clean redundant codes in scan.c Clean the redundant codes of apci_bus_get_power_flags(). Signed-off-by: Alex He Signed-off-by: Len Brown --- drivers/acpi/scan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 8ab80bafe3f1..5d24a17aa854 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -880,7 +880,6 @@ static int acpi_bus_get_power_flags(struct acpi_device *device) int j; device->power.flags.power_resources = 1; - ps->flags.valid = 1; for (j = 0; j < ps->resources.count; j++) acpi_bus_add_power_resource(ps->resources.handles[j]); } @@ -888,10 +887,8 @@ static int acpi_bus_get_power_flags(struct acpi_device *device) /* Evaluate "_PSx" to see if we can do explicit sets */ object_name[2] = 'S'; status = acpi_get_handle(device->handle, object_name, &handle); - if (ACPI_SUCCESS(status)) { + if (ACPI_SUCCESS(status)) ps->flags.explicit_set = 1; - ps->flags.valid = 1; - } /* State is valid if we have some power control */ if (ps->resources.count || ps->flags.explicit_set) From f2d4753fbd4d15c65d6ba48167aa83916ddbe518 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 31 Jan 2012 13:19:19 -0500 Subject: [PATCH 42/47] ACPI: export acpi_kobj Drivers may wish to add entries to /sys/firmware/acpi, so export acpi_kobj in order to let them do that. Signed-off-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9ecec98bc76e..3263b68cdfa3 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1010,6 +1010,7 @@ static int __init acpi_bus_init(void) } struct kobject *acpi_kobj; +EXPORT_SYMBOL_GPL(acpi_kobj); static int __init acpi_init(void) { From d1ff4b1cdbabb9ab9813f3d6e1cbec42cc5d6ed8 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 31 Jan 2012 13:19:20 -0500 Subject: [PATCH 43/47] ACPI: Add support for exposing BGRT data ACPI 5.0 adds the BGRT, a table that contains a pointer to the firmware boot splash and associated metadata. This simple driver exposes it via /sys/firmware/acpi in order to allow bootsplash applications to draw their splash around the firmware image and reduce the number of jarring graphical transitions during boot. Signed-off-by: Matthew Garrett Signed-off-by: Len Brown --- Documentation/ABI/testing/sysfs-firmware-acpi | 20 ++ drivers/acpi/Kconfig | 9 + drivers/acpi/Makefile | 1 + drivers/acpi/bgrt.c | 175 ++++++++++++++++++ 4 files changed, 205 insertions(+) create mode 100644 drivers/acpi/bgrt.c diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index 4f9ba3c2fca7..dd930c8db41f 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -1,3 +1,23 @@ +What: /sys/firmware/acpi/bgrt/ +Date: January 2012 +Contact: Matthew Garrett +Description: + The BGRT is an ACPI 5.0 feature that allows the OS + to obtain a copy of the firmware boot splash and + some associated metadata. This is intended to be used + by boot splash applications in order to interact with + the firmware boot splash in order to avoid jarring + transitions. + + image: The image bitmap. Currently a 32-bit BMP. + status: 1 if the image is valid, 0 if firmware invalidated it. + type: 0 indicates image is in BMP format. + version: The version of the BGRT. Currently 1. + xoffset: The number of pixels between the left of the screen + and the left edge of the image. + yoffset: The number of pixels between the top of the screen + and the top edge of the image. + What: /sys/firmware/acpi/interrupts/ Date: February 2008 Contact: Len Brown diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 7556913aba45..47768ff87343 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -384,6 +384,15 @@ config ACPI_CUSTOM_METHOD load additional kernel modules after boot, this feature may be used to override that restriction). +config ACPI_BGRT + tristate "Boottime Graphics Resource Table support" + default n + help + This driver adds support for exposing the ACPI Boottime Graphics + Resource Table, which allows the operating system to obtain + data from the firmware boot splash. It will appear under + /sys/firmware/acpi/bgrt/ . + source "drivers/acpi/apei/Kconfig" endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 1567028d2038..47199e2a9130 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_ACPI_SBS) += sbs.o obj-$(CONFIG_ACPI_HED) += hed.o obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.o obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o +obj-$(CONFIG_ACPI_BGRT) += bgrt.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c new file mode 100644 index 000000000000..8cf6c46e99fb --- /dev/null +++ b/drivers/acpi/bgrt.c @@ -0,0 +1,175 @@ +/* + * Copyright 2012 Red Hat, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static struct acpi_table_bgrt *bgrt_tab; +static struct kobject *bgrt_kobj; + +struct bmp_header { + u16 id; + u32 size; +} __attribute ((packed)); + +static struct bmp_header bmp_header; + +static ssize_t show_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->version); +} +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); + +static ssize_t show_status(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->status); +} +static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); + +static ssize_t show_type(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_type); +} +static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); + +static ssize_t show_xoffset(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_offset_x); +} +static DEVICE_ATTR(xoffset, S_IRUGO, show_xoffset, NULL); + +static ssize_t show_yoffset(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_offset_y); +} +static DEVICE_ATTR(yoffset, S_IRUGO, show_yoffset, NULL); + +static ssize_t show_image(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) +{ + int size = attr->size; + void __iomem *image = attr->private; + + if (off >= size) { + count = 0; + } else { + if (off + count > size) + count = size - off; + + memcpy_fromio(buf, image+off, count); + } + + return count; +} + +static struct bin_attribute image_attr = { + .attr = { + .name = "image", + .mode = S_IRUGO, + }, + .read = show_image, +}; + +static struct attribute *bgrt_attributes[] = { + &dev_attr_version.attr, + &dev_attr_status.attr, + &dev_attr_type.attr, + &dev_attr_xoffset.attr, + &dev_attr_yoffset.attr, + NULL, +}; + +static struct attribute_group bgrt_attribute_group = { + .attrs = bgrt_attributes, +}; + +static int __init bgrt_init(void) +{ + acpi_status status; + int ret; + void __iomem *bgrt; + + if (acpi_disabled) + return -ENODEV; + + status = acpi_get_table("BGRT", 0, + (struct acpi_table_header **)&bgrt_tab); + + if (ACPI_FAILURE(status)) + return -ENODEV; + + sysfs_bin_attr_init(&image_attr); + + bgrt = ioremap(bgrt_tab->image_address, sizeof(struct bmp_header)); + + if (!bgrt) { + ret = -EINVAL; + goto out_err; + } + + memcpy_fromio(&bmp_header, bgrt, sizeof(bmp_header)); + image_attr.size = bmp_header.size; + iounmap(bgrt); + + image_attr.private = ioremap(bgrt_tab->image_address, image_attr.size); + + if (!image_attr.private) { + ret = -EINVAL; + goto out_err; + } + + + bgrt_kobj = kobject_create_and_add("bgrt", acpi_kobj); + if (!bgrt_kobj) { + ret = -EINVAL; + goto out_iounmap; + } + + ret = sysfs_create_group(bgrt_kobj, &bgrt_attribute_group); + if (ret) + goto out_kobject; + + ret = sysfs_create_bin_file(bgrt_kobj, &image_attr); + if (ret) + goto out_group; + + return 0; + +out_group: + sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group); +out_kobject: + kobject_put(bgrt_kobj); +out_iounmap: + iounmap(image_attr.private); +out_err: + return ret; +} + +static void __exit bgrt_exit(void) +{ + iounmap(image_attr.private); + sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group); + sysfs_remove_bin_file(bgrt_kobj, &image_attr); +} + +module_init(bgrt_init); +module_exit(bgrt_exit); + +MODULE_AUTHOR("Matthew Garrett"); +MODULE_DESCRIPTION("BGRT boot graphic support"); +MODULE_LICENSE("GPL"); From ea9f8856bd6d4ed45885b06a338f7362cd6c60e5 Mon Sep 17 00:00:00 2001 From: Igor Murzov Date: Fri, 30 Mar 2012 21:32:08 +0400 Subject: [PATCH 44/47] ACPI video: Harden video bus adding. It is always better to check return values, so add some new checks and correct existing ones. v2: Be consistent and don't mix errors from -E* and AE_* namespaces. Signed-off-by: Igor Murzov Signed-off-by: Len Brown --- drivers/acpi/video.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index eaef02afc7cf..462486b9f9b2 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -548,27 +548,27 @@ acpi_video_device_EDID(struct acpi_video_device *device, * 1. The system BIOS should NOT automatically control the brightness * level of the LCD when the power changes from AC to DC. * Return Value: - * -1 wrong arg. + * -EINVAL wrong arg. */ static int acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) { - u64 status = 0; + acpi_status status; union acpi_object arg0 = { ACPI_TYPE_INTEGER }; struct acpi_object_list args = { 1, &arg0 }; - if (bios_flag < 0 || bios_flag > 3 || lcd_flag < 0 || lcd_flag > 1) { - status = -1; - goto Failed; - } + if (bios_flag < 0 || bios_flag > 3 || lcd_flag < 0 || lcd_flag > 1) + return -EINVAL; arg0.integer.value = (lcd_flag << 2) | bios_flag; video->dos_setting = arg0.integer.value; - acpi_evaluate_object(video->device->handle, "_DOS", &args, NULL); + status = acpi_evaluate_object(video->device->handle, "_DOS", + &args, NULL); + if (ACPI_FAILURE(status)) + return -EIO; - Failed: - return status; + return 0; } /* @@ -1343,15 +1343,17 @@ static int acpi_video_bus_get_devices(struct acpi_video_bus *video, struct acpi_device *device) { - int status = 0; + int status; struct acpi_device *dev; - acpi_video_device_enumerate(video); + status = acpi_video_device_enumerate(video); + if (status) + return status; list_for_each_entry(dev, &device->children, node) { status = acpi_video_bus_get_one_device(dev, video); - if (ACPI_FAILURE(status)) { + if (status) { printk(KERN_WARNING PREFIX "Can't attach device\n"); continue; @@ -1653,8 +1655,12 @@ static int acpi_video_bus_add(struct acpi_device *device) mutex_init(&video->device_list_lock); INIT_LIST_HEAD(&video->video_device_list); - acpi_video_bus_get_devices(video, device); - acpi_video_bus_start_devices(video); + error = acpi_video_bus_get_devices(video, device); + if (error) + goto err_free_video; + error = acpi_video_bus_start_devices(video); + if (error) + goto err_put_video; video->input = input = input_allocate_device(); if (!input) { @@ -1692,14 +1698,19 @@ static int acpi_video_bus_add(struct acpi_device *device) video->pm_nb.notifier_call = acpi_video_resume; video->pm_nb.priority = 0; - register_pm_notifier(&video->pm_nb); + error = register_pm_notifier(&video->pm_nb); + if (error) + goto err_unregister_input_dev; return 0; + err_unregister_input_dev: + input_unregister_device(input); err_free_input_dev: input_free_device(input); err_stop_video: acpi_video_bus_stop_devices(video); + err_put_video: acpi_video_bus_put_devices(video); kfree(video->attached_array); err_free_video: From b60e7f6166857c76871977794fa266b02da1f394 Mon Sep 17 00:00:00 2001 From: Igor Murzov Date: Fri, 30 Mar 2012 21:32:09 +0400 Subject: [PATCH 45/47] ACPI video: Don't start video device until its associated input device has been allocated Quoth Dmitry Torokhov: In addition to bus notifier we do install device notifier explicitly so it might fire up early. The easiest fox would be to move acpi_video_bus_start_devices() after input_allocate_device() but before input_register_device() - unregistered input devices can handle input_event() calls just fine. May fix crashes reported in: https://bugzilla.kernel.org/show_bug.cgi?id=40672 Signed-off-by: Igor Murzov Signed-off-by: Len Brown --- drivers/acpi/video.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 462486b9f9b2..9577b6fa2650 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1658,16 +1658,17 @@ static int acpi_video_bus_add(struct acpi_device *device) error = acpi_video_bus_get_devices(video, device); if (error) goto err_free_video; - error = acpi_video_bus_start_devices(video); - if (error) - goto err_put_video; video->input = input = input_allocate_device(); if (!input) { error = -ENOMEM; - goto err_stop_video; + goto err_put_video; } + error = acpi_video_bus_start_devices(video); + if (error) + goto err_free_input_dev; + snprintf(video->phys, sizeof(video->phys), "%s/video/input0", acpi_device_hid(video->device)); @@ -1688,7 +1689,7 @@ static int acpi_video_bus_add(struct acpi_device *device) error = input_register_device(input); if (error) - goto err_free_input_dev; + goto err_stop_video; printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s rom: %s post: %s)\n", ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), @@ -1706,10 +1707,10 @@ static int acpi_video_bus_add(struct acpi_device *device) err_unregister_input_dev: input_unregister_device(input); - err_free_input_dev: - input_free_device(input); err_stop_video: acpi_video_bus_stop_devices(video); + err_free_input_dev: + input_free_device(input); err_put_video: acpi_video_bus_put_devices(video); kfree(video->attached_array); From c264c651fd318274ffe27219947f17f24f07c073 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 6 Feb 2012 08:17:12 -0800 Subject: [PATCH 46/47] Disable MCP limit exceeded messages from Intel IPS driver On a system on the thermal limit these are quite noisy and flood the logs. Better would be a counter anyways. But given that we don't even have anything for normal throttling this doesn't seem to be urgent either. Reviewed-by: Jesse Barnes Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- drivers/platform/x86/intel_ips.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 88a98cff5a44..f7ba316e0ed6 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -609,25 +609,16 @@ static bool mcp_exceeded(struct ips_driver *ips) bool ret = false; u32 temp_limit; u32 avg_power; - const char *msg = "MCP limit exceeded: "; spin_lock_irqsave(&ips->turbo_status_lock, flags); temp_limit = ips->mcp_temp_limit * 100; - if (ips->mcp_avg_temp > temp_limit) { - dev_info(&ips->dev->dev, - "%sAvg temp %u, limit %u\n", msg, ips->mcp_avg_temp, - temp_limit); + if (ips->mcp_avg_temp > temp_limit) ret = true; - } avg_power = ips->cpu_avg_power + ips->mch_avg_power; - if (avg_power > ips->mcp_power_limit) { - dev_info(&ips->dev->dev, - "%sAvg power %u, limit %u\n", msg, avg_power, - ips->mcp_power_limit); + if (avg_power > ips->mcp_power_limit) ret = true; - } spin_unlock_irqrestore(&ips->turbo_status_lock, flags); From 344e222edf486bf42da1ced137e36df7a345b0ad Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Mar 2012 14:57:36 +0300 Subject: [PATCH 47/47] ACPI throttling: fix endian bug in acpi_read_throttling_status() Using a u64 here creates an endian bug. We store a u32 number in the top byte which is a larger number than intended on big endian systems. There is no reason to use a 64 bit data type here, I guess it was just an oversight. I removed the initialization to zero as well. It's needed with a u64 but with a u32, the variable gets initialized properly inside the call to acpi_os_read_port(). Signed-off-by: Dan Carpenter Signed-off-by: Len Brown --- drivers/acpi/processor_throttling.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 605a2954ef17..1d02b7b5ade0 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -769,7 +769,7 @@ static int acpi_read_throttling_status(struct acpi_processor *pr, u64 *value) { u32 bit_width, bit_offset; - u64 ptc_value; + u32 ptc_value; u64 ptc_mask; struct acpi_processor_throttling *throttling; int ret = -1; @@ -777,12 +777,11 @@ static int acpi_read_throttling_status(struct acpi_processor *pr, throttling = &pr->throttling; switch (throttling->status_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: - ptc_value = 0; bit_width = throttling->status_register.bit_width; bit_offset = throttling->status_register.bit_offset; acpi_os_read_port((acpi_io_address) throttling->status_register. - address, (u32 *) &ptc_value, + address, &ptc_value, (u32) (bit_width + bit_offset)); ptc_mask = (1 << bit_width) - 1; *value = (u64) ((ptc_value >> bit_offset) & ptc_mask);