diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index b045fe54986a..14f4e6336d88 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -26,6 +26,7 @@ Contents: 1.4 target/target_index or setpolicy? 1.5 target/target_index 1.6 setpolicy +1.7 get_intermediate and target_intermediate 2. Frequency Table Helpers @@ -79,6 +80,10 @@ cpufreq_driver.attr - A pointer to a NULL-terminated list of "struct freq_attr" which allow to export values to sysfs. +cpufreq_driver.get_intermediate +and target_intermediate Used to switch to stable frequency while + changing CPU frequency. + 1.2 Per-CPU Initialization -------------------------- @@ -151,7 +156,7 @@ Some cpufreq-capable processors switch the frequency between certain limits on their own. These shall use the ->setpolicy call -1.4. target/target_index +1.5. target/target_index ------------- The target_index call has two arguments: struct cpufreq_policy *policy, @@ -160,6 +165,9 @@ and unsigned int index (into the exposed frequency table). The CPUfreq driver must set the new frequency when called here. The actual frequency must be determined by freq_table[index].frequency. +It should always restore to earlier frequency (i.e. policy->restore_freq) in +case of errors, even if we switched to intermediate frequency earlier. + Deprecated: ---------- The target call has three arguments: struct cpufreq_policy *policy, @@ -179,7 +187,7 @@ Here again the frequency table helper might assist you - see section 2 for details. -1.5 setpolicy +1.6 setpolicy --------------- The setpolicy call only takes a struct cpufreq_policy *policy as @@ -190,6 +198,23 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check the reference implementation in drivers/cpufreq/longrun.c +1.7 get_intermediate and target_intermediate +-------------------------------------------- + +Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset. + +get_intermediate should return a stable intermediate frequency platform wants to +switch to, and target_intermediate() should set CPU to to that frequency, before +jumping to the frequency corresponding to 'index'. Core will take care of +sending notifications and driver doesn't have to handle them in +target_intermediate() or target_index(). + +Drivers can return '0' from get_intermediate() in case they don't wish to switch +to intermediate frequency for some target frequency. In that case core will +directly call ->target_index(). + +NOTE: ->target_index() should restore to policy->restore_freq in case of +failures as core would send notifications for that. 2. Frequency Table Helpers diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index e13dafc8e8f1..2850df3bf957 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -1,6 +1,6 @@ Interaction of Suspend code (S3) with the CPU hotplug infrastructure - (C) 2011 Srivatsa S. Bhat + (C) 2011 - 2014 Srivatsa S. Bhat I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 147bc1b91b42..3f2bdc812d23 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1810,6 +1810,16 @@ acpi_status __init acpi_os_initialize(void) acpi_os_map_generic_address(&acpi_gbl_FADT.xpm1b_event_block); acpi_os_map_generic_address(&acpi_gbl_FADT.xgpe0_block); acpi_os_map_generic_address(&acpi_gbl_FADT.xgpe1_block); + if (acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) { + /* + * Use acpi_os_map_generic_address to pre-map the reset + * register if it's in system memory. + */ + int rv; + + rv = acpi_os_map_generic_address(&acpi_gbl_FADT.reset_register); + pr_debug(PREFIX "%s: map reset_reg status %d\n", __func__, rv); + } return AE_OK; } @@ -1838,6 +1848,8 @@ acpi_status acpi_os_terminate(void) acpi_os_unmap_generic_address(&acpi_gbl_FADT.xgpe0_block); acpi_os_unmap_generic_address(&acpi_gbl_FADT.xpm1b_event_block); acpi_os_unmap_generic_address(&acpi_gbl_FADT.xpm1a_event_block); + if (acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) + acpi_os_unmap_generic_address(&acpi_gbl_FADT.reset_register); destroy_workqueue(kacpid_wq); destroy_workqueue(kacpi_notify_wq); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index c11e3795431b..b3e3cc73ba79 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "internal.h" #include "sleep.h" @@ -501,6 +502,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state) ACPI_FLUSH_CPU_CACHE(); + trace_suspend_resume(TPS("acpi_suspend"), acpi_state, true); switch (acpi_state) { case ACPI_STATE_S1: barrier(); @@ -516,6 +518,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state) pr_info(PREFIX "Low-level resume complete\n"); break; } + trace_suspend_resume(TPS("acpi_suspend"), acpi_state, false); /* This violates the spec but is required for bug compatibility. */ acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 101fb090dcb9..fb9ffe9adc64 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -82,7 +82,7 @@ module_param(allow_duplicates, bool, 0644); * For Windows 8 systems: used to decide if video module * should skip registering backlight interface of its own. */ -static int use_native_backlight_param = -1; +static int use_native_backlight_param = 1; module_param_named(use_native_backlight, use_native_backlight_param, int, 0444); static bool use_native_backlight_dmi = false; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 343ffad59377..bf412961a934 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -214,9 +214,6 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime, pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev), error, (unsigned long long)nsecs >> 10); } - - trace_device_pm_report_time(dev, info, nsecs, pm_verb(state.event), - error); } /** @@ -387,7 +384,9 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, calltime = initcall_debug_start(dev); pm_dev_dbg(dev, state, info); + trace_device_pm_callback_start(dev, info, state.event); error = cb(dev); + trace_device_pm_callback_end(dev, error); suspend_report_result(cb, error); initcall_debug_report(dev, calltime, error, state, info); @@ -545,6 +544,7 @@ static void dpm_resume_noirq(pm_message_t state) struct device *dev; ktime_t starttime = ktime_get(); + trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true); mutex_lock(&dpm_list_mtx); pm_transition = state; @@ -587,6 +587,7 @@ static void dpm_resume_noirq(pm_message_t state) dpm_show_time(starttime, state, "noirq"); resume_device_irqs(); cpuidle_resume(); + trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false); } /** @@ -664,6 +665,7 @@ static void dpm_resume_early(pm_message_t state) struct device *dev; ktime_t starttime = ktime_get(); + trace_suspend_resume(TPS("dpm_resume_early"), state.event, true); mutex_lock(&dpm_list_mtx); pm_transition = state; @@ -703,6 +705,7 @@ static void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, "early"); + trace_suspend_resume(TPS("dpm_resume_early"), state.event, false); } /** @@ -834,6 +837,7 @@ void dpm_resume(pm_message_t state) struct device *dev; ktime_t starttime = ktime_get(); + trace_suspend_resume(TPS("dpm_resume"), state.event, true); might_sleep(); mutex_lock(&dpm_list_mtx); @@ -875,6 +879,7 @@ void dpm_resume(pm_message_t state) dpm_show_time(starttime, state, NULL); cpufreq_resume(); + trace_suspend_resume(TPS("dpm_resume"), state.event, false); } /** @@ -913,7 +918,9 @@ static void device_complete(struct device *dev, pm_message_t state) if (callback) { pm_dev_dbg(dev, state, info); + trace_device_pm_callback_start(dev, info, state.event); callback(dev); + trace_device_pm_callback_end(dev, 0); } device_unlock(dev); @@ -932,6 +939,7 @@ void dpm_complete(pm_message_t state) { struct list_head list; + trace_suspend_resume(TPS("dpm_complete"), state.event, true); might_sleep(); INIT_LIST_HEAD(&list); @@ -951,6 +959,7 @@ void dpm_complete(pm_message_t state) } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); + trace_suspend_resume(TPS("dpm_complete"), state.event, false); } /** @@ -1086,6 +1095,7 @@ static int dpm_suspend_noirq(pm_message_t state) ktime_t starttime = ktime_get(); int error = 0; + trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true); cpuidle_pause(); suspend_device_irqs(); mutex_lock(&dpm_list_mtx); @@ -1126,6 +1136,7 @@ static int dpm_suspend_noirq(pm_message_t state) } else { dpm_show_time(starttime, state, "noirq"); } + trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false); return error; } @@ -1222,6 +1233,7 @@ static int dpm_suspend_late(pm_message_t state) ktime_t starttime = ktime_get(); int error = 0; + trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; @@ -1257,6 +1269,7 @@ static int dpm_suspend_late(pm_message_t state) } else { dpm_show_time(starttime, state, "late"); } + trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false); return error; } @@ -1295,7 +1308,9 @@ static int legacy_suspend(struct device *dev, pm_message_t state, calltime = initcall_debug_start(dev); + trace_device_pm_callback_start(dev, info, state.event); error = cb(dev, state); + trace_device_pm_callback_end(dev, error); suspend_report_result(cb, error); initcall_debug_report(dev, calltime, error, state, info); @@ -1461,6 +1476,7 @@ int dpm_suspend(pm_message_t state) ktime_t starttime = ktime_get(); int error = 0; + trace_suspend_resume(TPS("dpm_suspend"), state.event, true); might_sleep(); cpufreq_suspend(); @@ -1498,6 +1514,7 @@ int dpm_suspend(pm_message_t state) dpm_save_failed_step(SUSPEND_SUSPEND); } else dpm_show_time(starttime, state, NULL); + trace_suspend_resume(TPS("dpm_suspend"), state.event, false); return error; } @@ -1549,8 +1566,11 @@ static int device_prepare(struct device *dev, pm_message_t state) callback = dev->driver->pm->prepare; } - if (callback) + if (callback) { + trace_device_pm_callback_start(dev, info, state.event); ret = callback(dev); + trace_device_pm_callback_end(dev, ret); + } device_unlock(dev); @@ -1582,6 +1602,7 @@ int dpm_prepare(pm_message_t state) { int error = 0; + trace_suspend_resume(TPS("dpm_prepare"), state.event, true); might_sleep(); mutex_lock(&dpm_list_mtx); @@ -1612,6 +1633,7 @@ int dpm_prepare(pm_message_t state) put_device(dev); } mutex_unlock(&dpm_list_mtx); + trace_suspend_resume(TPS("dpm_prepare"), state.event, false); return error; } diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index e8d11b6630ee..dbb8350ea8dc 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -10,6 +10,7 @@ #include #include #include +#include static LIST_HEAD(syscore_ops_list); static DEFINE_MUTEX(syscore_ops_lock); @@ -49,6 +50,7 @@ int syscore_suspend(void) struct syscore_ops *ops; int ret = 0; + trace_suspend_resume(TPS("syscore_suspend"), 0, true); pr_debug("Checking wakeup interrupts\n"); /* Return error code if there are any wakeup interrupts pending. */ @@ -70,6 +72,7 @@ int syscore_suspend(void) "Interrupts enabled after %pF\n", ops->suspend); } + trace_suspend_resume(TPS("syscore_suspend"), 0, false); return 0; err_out: @@ -92,6 +95,7 @@ void syscore_resume(void) { struct syscore_ops *ops; + trace_suspend_resume(TPS("syscore_resume"), 0, true); WARN_ONCE(!irqs_disabled(), "Interrupts enabled before system core resume.\n"); @@ -103,6 +107,7 @@ void syscore_resume(void) WARN_ONCE(!irqs_disabled(), "Interrupts enabled after %pF\n", ops->resume); } + trace_suspend_resume(TPS("syscore_resume"), 0, false); } EXPORT_SYMBOL_GPL(syscore_resume); #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 1fbe11f2a146..e473d6555f96 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE config GENERIC_CPUFREQ_CPU0 tristate "Generic CPU0 cpufreq driver" - depends on HAVE_CLK && REGULATOR && OF && THERMAL && CPU_THERMAL + depends on HAVE_CLK && OF select PM_OPP help This adds a generic cpufreq driver for CPU0 frequency management. diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 36d20d0fce27..ebac67115009 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -5,8 +5,7 @@ # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on (BIG_LITTLE && ARM_CPU_TOPOLOGY) || (ARM64 && SMP) - depends on HAVE_CLK + depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 09b9129c7bd3..ee1ae303a07c 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -104,7 +104,7 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy) } static struct cpufreq_driver cpu0_cpufreq_driver = { - .flags = CPUFREQ_STICKY, + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = cpu0_set_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ae11dd51f81d..aed2b0cb83dc 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); * GOVERNORS * *********************************************************************/ +/* Must set freqs->new to intermediate frequency */ +static int __target_intermediate(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, int index) +{ + int ret; + + freqs->new = cpufreq_driver->get_intermediate(policy, index); + + /* We don't need to switch to intermediate freq */ + if (!freqs->new) + return 0; + + pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n", + __func__, policy->cpu, freqs->old, freqs->new); + + cpufreq_freq_transition_begin(policy, freqs); + ret = cpufreq_driver->target_intermediate(policy, index); + cpufreq_freq_transition_end(policy, freqs, ret); + + if (ret) + pr_err("%s: Failed to change to intermediate frequency: %d\n", + __func__, ret); + + return ret; +} + static int __target_index(struct cpufreq_policy *policy, struct cpufreq_frequency_table *freq_table, int index) { - struct cpufreq_freqs freqs; + struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0}; + unsigned int intermediate_freq = 0; int retval = -EINVAL; bool notify; notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION); - if (notify) { - freqs.old = policy->cur; - freqs.new = freq_table[index].frequency; - freqs.flags = 0; + /* Handle switching to intermediate frequency */ + if (cpufreq_driver->get_intermediate) { + retval = __target_intermediate(policy, &freqs, index); + if (retval) + return retval; + intermediate_freq = freqs.new; + /* Set old freq to intermediate */ + if (intermediate_freq) + freqs.old = freqs.new; + } + + freqs.new = freq_table[index].frequency; pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n", __func__, policy->cpu, freqs.old, freqs.new); @@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy, pr_err("%s: Failed to change cpu frequency: %d\n", __func__, retval); - if (notify) + if (notify) { cpufreq_freq_transition_end(policy, &freqs, retval); + /* + * Failed after setting to intermediate freq? Driver should have + * reverted back to initial frequency and so should we. Check + * here for intermediate_freq instead of get_intermediate, in + * case we have't switched to intermediate freq at all. + */ + if (unlikely(retval && intermediate_freq)) { + freqs.old = intermediate_freq; + freqs.new = policy->restore_freq; + cpufreq_freq_transition_begin(policy, &freqs); + cpufreq_freq_transition_end(policy, &freqs, 0); + } + } + return retval; } @@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (target_freq == policy->cur) return 0; + /* Save last value to restore later on errors */ + policy->restore_freq = policy->cur; + if (cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); else if (cpufreq_driver->target_index) { @@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) !(driver_data->setpolicy || driver_data->target_index || driver_data->target) || (driver_data->setpolicy && (driver_data->target_index || - driver_data->target))) + driver_data->target)) || + (!!driver_data->get_intermediate != !!driver_data->target_intermediate)) return -EINVAL; pr_debug("trying to register driver %s\n", driver_data->name); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e1c6433b16e0..1b44496b2d2b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -36,14 +36,29 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy; + unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; unsigned int j; - if (dbs_data->cdata->governor == GOV_ONDEMAND) + if (dbs_data->cdata->governor == GOV_ONDEMAND) { + struct od_cpu_dbs_info_s *od_dbs_info = + dbs_data->cdata->get_cpu_dbs_info_s(cpu); + + /* + * Sometimes, the ondemand governor uses an additional + * multiplier to give long delays. So apply this multiplier to + * the 'sampling_rate', so as to keep the wake-up-from-idle + * detection logic a bit conservative. + */ + sampling_rate = od_tuners->sampling_rate; + sampling_rate *= od_dbs_info->rate_mult; + ignore_nice = od_tuners->ignore_nice_load; - else + } else { + sampling_rate = cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice_load; + } policy = cdbs->cur_policy; @@ -96,7 +111,46 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) if (unlikely(!wall_time || wall_time < idle_time)) continue; - load = 100 * (wall_time - idle_time) / wall_time; + /* + * If the CPU had gone completely idle, and a task just woke up + * on this CPU now, it would be unfair to calculate 'load' the + * usual way for this elapsed time-window, because it will show + * near-zero load, irrespective of how CPU intensive that task + * actually is. This is undesirable for latency-sensitive bursty + * workloads. + * + * To avoid this, we reuse the 'load' from the previous + * time-window and give this task a chance to start with a + * reasonably high CPU frequency. (However, we shouldn't over-do + * this copy, lest we get stuck at a high load (high frequency) + * for too long, even when the current system load has actually + * dropped down. So we perform the copy only once, upon the + * first wake-up from idle.) + * + * Detecting this situation is easy: the governor's deferrable + * timer would not have fired during CPU-idle periods. Hence + * an unusually large 'wall_time' (as compared to the sampling + * rate) indicates this scenario. + * + * prev_load can be zero in two cases and we must recalculate it + * for both cases: + * - during long idle intervals + * - explicitly set to zero + */ + if (unlikely(wall_time > (2 * sampling_rate) && + j_cdbs->prev_load)) { + load = j_cdbs->prev_load; + + /* + * Perform a destructive copy, to ensure that we copy + * the previous load only once, upon the first wake-up + * from idle. + */ + j_cdbs->prev_load = 0; + } else { + load = 100 * (wall_time - idle_time) / wall_time; + j_cdbs->prev_load = load; + } if (load > max_load) max_load = load; @@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); + unsigned int prev_load; j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + + prev_load = (unsigned int) + (j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle); + j_cdbs->prev_load = 100 * prev_load / + (unsigned int) j_cdbs->prev_cpu_wall; + if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bfb9ae14142c..cc401d147e72 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -134,6 +134,13 @@ struct cpu_dbs_common_info { u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; + /* + * Used to keep track of load in the previous interval. However, when + * explicitly set to zero, it is used as a flag to ensure that we copy + * the previous load to the current interval only once, upon the first + * wake-up from idle. + */ + unsigned int prev_load; struct cpufreq_policy *cur_policy; struct delayed_work work; /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index aebd4572eb6d..4e7f492ad583 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -691,14 +691,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static int intel_pstate_init_cpu(unsigned int cpunum) { - - const struct x86_cpu_id *id; struct cpudata *cpu; - id = x86_match_cpu(intel_pstate_cpu_ids); - if (!id) - return -ENODEV; - all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); if (!all_cpu_data[cpunum]) return -ENOMEM; diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c index 0af618abebaf..3607070797af 100644 --- a/drivers/cpufreq/ppc-corenet-cpufreq.c +++ b/drivers/cpufreq/ppc-corenet-cpufreq.c @@ -138,7 +138,7 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *table; struct cpu_data *data; unsigned int cpu = policy->cpu; - u64 transition_latency_hz; + u64 u64temp; np = of_get_cpu_node(cpu, NULL); if (!np) @@ -206,9 +206,10 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) for_each_cpu(i, per_cpu(cpu_mask, cpu)) per_cpu(cpu_data, i) = data; - transition_latency_hz = 12ULL * NSEC_PER_SEC; - policy->cpuinfo.transition_latency = - do_div(transition_latency_hz, fsl_get_sys_freq()); + /* Minimum transition latency is 12 platform clocks */ + u64temp = 12ULL * NSEC_PER_SEC; + do_div(u64temp, fsl_get_sys_freq()); + policy->cpuinfo.transition_latency = u64temp + 1; of_node_put(np); diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra-cpufreq.c index 6e774c6ac20b..8084c7f7e206 100644 --- a/drivers/cpufreq/tegra-cpufreq.c +++ b/drivers/cpufreq/tegra-cpufreq.c @@ -45,46 +45,54 @@ static struct clk *cpu_clk; static struct clk *pll_x_clk; static struct clk *pll_p_clk; static struct clk *emc_clk; +static bool pll_x_prepared; -static int tegra_cpu_clk_set_rate(unsigned long rate) +static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy, + unsigned int index) +{ + unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; + + /* + * Don't switch to intermediate freq if: + * - we are already at it, i.e. policy->cur == ifreq + * - index corresponds to ifreq + */ + if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq)) + return 0; + + return ifreq; +} + +static int tegra_target_intermediate(struct cpufreq_policy *policy, + unsigned int index) { int ret; /* * Take an extra reference to the main pll so it doesn't turn - * off when we move the cpu off of it + * off when we move the cpu off of it as enabling it again while we + * switch to it from tegra_target() would take additional time. + * + * When target-freq is equal to intermediate freq we don't need to + * switch to an intermediate freq and so this routine isn't called. + * Also, we wouldn't be using pll_x anymore and must not take extra + * reference to it, as it can be disabled now to save some power. */ clk_prepare_enable(pll_x_clk); ret = clk_set_parent(cpu_clk, pll_p_clk); - if (ret) { - pr_err("Failed to switch cpu to clock pll_p\n"); - goto out; - } + if (ret) + clk_disable_unprepare(pll_x_clk); + else + pll_x_prepared = true; - if (rate == clk_get_rate(pll_p_clk)) - goto out; - - ret = clk_set_rate(pll_x_clk, rate); - if (ret) { - pr_err("Failed to change pll_x to %lu\n", rate); - goto out; - } - - ret = clk_set_parent(cpu_clk, pll_x_clk); - if (ret) { - pr_err("Failed to switch cpu to clock pll_x\n"); - goto out; - } - -out: - clk_disable_unprepare(pll_x_clk); return ret; } static int tegra_target(struct cpufreq_policy *policy, unsigned int index) { unsigned long rate = freq_table[index].frequency; + unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; int ret = 0; /* @@ -98,10 +106,30 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index) else clk_set_rate(emc_clk, 100000000); /* emc 50Mhz */ - ret = tegra_cpu_clk_set_rate(rate * 1000); + /* + * target freq == pll_p, don't need to take extra reference to pll_x_clk + * as it isn't used anymore. + */ + if (rate == ifreq) + return clk_set_parent(cpu_clk, pll_p_clk); + + ret = clk_set_rate(pll_x_clk, rate * 1000); + /* Restore to earlier frequency on error, i.e. pll_x */ if (ret) - pr_err("cpu-tegra: Failed to set cpu frequency to %lu kHz\n", - rate); + pr_err("Failed to change pll_x to %lu\n", rate); + + ret = clk_set_parent(cpu_clk, pll_x_clk); + /* This shouldn't fail while changing or restoring */ + WARN_ON(ret); + + /* + * Drop count to pll_x clock only if we switched to intermediate freq + * earlier while transitioning to a target frequency. + */ + if (pll_x_prepared) { + clk_disable_unprepare(pll_x_clk); + pll_x_prepared = false; + } return ret; } @@ -137,16 +165,18 @@ static int tegra_cpu_exit(struct cpufreq_policy *policy) } static struct cpufreq_driver tegra_cpufreq_driver = { - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = tegra_target, - .get = cpufreq_generic_get, - .init = tegra_cpu_init, - .exit = tegra_cpu_exit, - .name = "tegra", - .attr = cpufreq_generic_attr, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .get_intermediate = tegra_get_intermediate, + .target_intermediate = tegra_target_intermediate, + .target_index = tegra_target, + .get = cpufreq_generic_get, + .init = tegra_cpu_init, + .exit = tegra_cpu_exit, + .name = "tegra", + .attr = cpufreq_generic_attr, #ifdef CONFIG_PM - .suspend = cpufreq_generic_suspend, + .suspend = cpufreq_generic_suspend, #endif }; diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 2b859249303b..b0e61bf261a7 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -142,6 +142,16 @@ static inline acpi_handle func_to_handle(struct acpiphp_func *func) return func_to_acpi_device(func)->handle; } +struct acpiphp_root_context { + struct acpi_hotplug_context hp; + struct acpiphp_bridge *root_bridge; +}; + +static inline struct acpiphp_root_context *to_acpiphp_root_context(struct acpi_hotplug_context *hp) +{ + return container_of(hp, struct acpiphp_root_context, hp); +} + /* * struct acpiphp_attention_info - device specific attention registration * diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 75e178330215..91aa3d780138 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -373,17 +373,13 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data, static struct acpiphp_bridge *acpiphp_dev_to_bridge(struct acpi_device *adev) { - struct acpiphp_context *context; struct acpiphp_bridge *bridge = NULL; acpi_lock_hp_context(); - context = acpiphp_get_context(adev); - if (context) { - bridge = context->bridge; + if (adev->hp) { + bridge = to_acpiphp_root_context(adev->hp)->root_bridge; if (bridge) get_bridge(bridge); - - acpiphp_put_context(context); } acpi_unlock_hp_context(); return bridge; @@ -881,7 +877,17 @@ void acpiphp_enumerate_slots(struct pci_bus *bus) */ get_device(&bus->dev); - if (!pci_is_root_bus(bridge->pci_bus)) { + acpi_lock_hp_context(); + if (pci_is_root_bus(bridge->pci_bus)) { + struct acpiphp_root_context *root_context; + + root_context = kzalloc(sizeof(*root_context), GFP_KERNEL); + if (!root_context) + goto err; + + root_context->root_bridge = bridge; + acpi_set_hp_context(adev, &root_context->hp, NULL, NULL, NULL); + } else { struct acpiphp_context *context; /* @@ -890,21 +896,16 @@ void acpiphp_enumerate_slots(struct pci_bus *bus) * parent is going to be handled by pciehp, in which case this * bridge is not interesting to us either. */ - acpi_lock_hp_context(); context = acpiphp_get_context(adev); - if (!context) { - acpi_unlock_hp_context(); - put_device(&bus->dev); - pci_dev_put(bridge->pci_dev); - kfree(bridge); - return; - } + if (!context) + goto err; + bridge->context = context; context->bridge = bridge; /* Get a reference to the parent bridge. */ get_bridge(context->func.parent); - acpi_unlock_hp_context(); } + acpi_unlock_hp_context(); /* Must be added to the list prior to calling acpiphp_add_context(). */ mutex_lock(&bridge_mutex); @@ -919,6 +920,30 @@ void acpiphp_enumerate_slots(struct pci_bus *bus) cleanup_bridge(bridge); put_bridge(bridge); } + return; + + err: + acpi_unlock_hp_context(); + put_device(&bus->dev); + pci_dev_put(bridge->pci_dev); + kfree(bridge); +} + +void acpiphp_drop_bridge(struct acpiphp_bridge *bridge) +{ + if (pci_is_root_bus(bridge->pci_bus)) { + struct acpiphp_root_context *root_context; + struct acpi_device *adev; + + acpi_lock_hp_context(); + adev = ACPI_COMPANION(bridge->pci_bus->bridge); + root_context = to_acpiphp_root_context(adev->hp); + adev->hp = NULL; + acpi_unlock_hp_context(); + kfree(root_context); + } + cleanup_bridge(bridge); + put_bridge(bridge); } /** @@ -936,8 +961,7 @@ void acpiphp_remove_slots(struct pci_bus *bus) list_for_each_entry(bridge, &bridge_list, list) if (bridge->pci_bus == bus) { mutex_unlock(&bridge_mutex); - cleanup_bridge(bridge); - put_bridge(bridge); + acpiphp_drop_bridge(bridge); return; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3f458896d45c..ec4112d257bc 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -75,6 +75,7 @@ struct cpufreq_policy { unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ + unsigned int restore_freq; /* = policy->cur before transition */ unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ @@ -221,11 +222,35 @@ struct cpufreq_driver { /* define one out of two */ int (*setpolicy) (struct cpufreq_policy *policy); + + /* + * On failure, should always restore frequency to policy->restore_freq + * (i.e. old freq). + */ int (*target) (struct cpufreq_policy *policy, /* Deprecated */ unsigned int target_freq, unsigned int relation); int (*target_index) (struct cpufreq_policy *policy, unsigned int index); + /* + * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION + * unset. + * + * get_intermediate should return a stable intermediate frequency + * platform wants to switch to and target_intermediate() should set CPU + * to to that frequency, before jumping to the frequency corresponding + * to 'index'. Core will take care of sending notifications and driver + * doesn't have to handle them in target_intermediate() or + * target_index(). + * + * Drivers can return '0' from get_intermediate() in case they don't + * wish to switch to intermediate frequency for some target frequency. + * In that case core will directly call ->target_index(). + */ + unsigned int (*get_intermediate)(struct cpufreq_policy *policy, + unsigned int index); + int (*target_intermediate)(struct cpufreq_policy *policy, + unsigned int index); /* should be defined, if possible */ unsigned int (*get) (unsigned int cpu); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9a7e08d61258..d19840b0cac8 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -7,6 +7,9 @@ #include #include #include +#include + +#define TPS(x) tracepoint_string(x) DECLARE_EVENT_CLASS(cpu, @@ -90,6 +93,17 @@ TRACE_EVENT(pstate_sample, #define PWR_EVENT_EXIT -1 #endif +#define pm_verb_symbolic(event) \ + __print_symbolic(event, \ + { PM_EVENT_SUSPEND, "suspend" }, \ + { PM_EVENT_RESUME, "resume" }, \ + { PM_EVENT_FREEZE, "freeze" }, \ + { PM_EVENT_QUIESCE, "quiesce" }, \ + { PM_EVENT_HIBERNATE, "hibernate" }, \ + { PM_EVENT_THAW, "thaw" }, \ + { PM_EVENT_RESTORE, "restore" }, \ + { PM_EVENT_RECOVER, "recover" }) + DEFINE_EVENT(cpu, cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu_id), @@ -97,58 +111,76 @@ DEFINE_EVENT(cpu, cpu_frequency, TP_ARGS(frequency, cpu_id) ); -TRACE_EVENT(machine_suspend, +TRACE_EVENT(device_pm_callback_start, - TP_PROTO(unsigned int state), + TP_PROTO(struct device *dev, const char *pm_ops, int event), - TP_ARGS(state), - - TP_STRUCT__entry( - __field( u32, state ) - ), - - TP_fast_assign( - __entry->state = state; - ), - - TP_printk("state=%lu", (unsigned long)__entry->state) -); - -TRACE_EVENT(device_pm_report_time, - - TP_PROTO(struct device *dev, const char *pm_ops, s64 ops_time, - char *pm_event_str, int error), - - TP_ARGS(dev, pm_ops, ops_time, pm_event_str, error), + TP_ARGS(dev, pm_ops, event), TP_STRUCT__entry( __string(device, dev_name(dev)) __string(driver, dev_driver_string(dev)) __string(parent, dev->parent ? dev_name(dev->parent) : "none") __string(pm_ops, pm_ops ? pm_ops : "none ") - __string(pm_event_str, pm_event_str) - __field(s64, ops_time) + __field(int, event) + ), + + TP_fast_assign( + __assign_str(device, dev_name(dev)); + __assign_str(driver, dev_driver_string(dev)); + __assign_str(parent, + dev->parent ? dev_name(dev->parent) : "none"); + __assign_str(pm_ops, pm_ops ? pm_ops : "none "); + __entry->event = event; + ), + + TP_printk("%s %s, parent: %s, %s[%s]", __get_str(driver), + __get_str(device), __get_str(parent), __get_str(pm_ops), + pm_verb_symbolic(__entry->event)) +); + +TRACE_EVENT(device_pm_callback_end, + + TP_PROTO(struct device *dev, int error), + + TP_ARGS(dev, error), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __string(driver, dev_driver_string(dev)) __field(int, error) ), TP_fast_assign( - const char *tmp = dev->parent ? dev_name(dev->parent) : "none"; - const char *tmp_i = pm_ops ? pm_ops : "none "; - __assign_str(device, dev_name(dev)); __assign_str(driver, dev_driver_string(dev)); - __assign_str(parent, tmp); - __assign_str(pm_ops, tmp_i); - __assign_str(pm_event_str, pm_event_str); - __entry->ops_time = ops_time; __entry->error = error; ), - /* ops_str has an extra space at the end */ - TP_printk("%s %s parent=%s state=%s ops=%snsecs=%lld err=%d", - __get_str(driver), __get_str(device), __get_str(parent), - __get_str(pm_event_str), __get_str(pm_ops), - __entry->ops_time, __entry->error) + TP_printk("%s %s, err=%d", + __get_str(driver), __get_str(device), __entry->error) +); + +TRACE_EVENT(suspend_resume, + + TP_PROTO(const char *action, int val, bool start), + + TP_ARGS(action, val, start), + + TP_STRUCT__entry( + __field(const char *, action) + __field(int, val) + __field(bool, start) + ), + + TP_fast_assign( + __entry->action = action; + __entry->val = val; + __entry->start = start; + ), + + TP_printk("%s[%u] %s", __entry->action, (unsigned int)__entry->val, + (__entry->start)?"begin":"end") ); DECLARE_EVENT_CLASS(wakeup_source, diff --git a/kernel/cpu.c b/kernel/cpu.c index acf791c55b71..a343bde710b1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "smpboot.h" @@ -520,7 +521,9 @@ int disable_nonboot_cpus(void) for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; + trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1); + trace_suspend_resume(TPS("CPU_OFF"), cpu, false); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { @@ -563,7 +566,9 @@ void __ref enable_nonboot_cpus(void) arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { + trace_suspend_resume(TPS("CPU_ON"), cpu, true); error = _cpu_up(cpu, 1); + trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); continue; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index df88d55dc436..49e0a20fd010 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "power.h" @@ -292,7 +293,9 @@ static int create_image(int platform_mode) in_suspend = 1; save_processor_state(); + trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); error = swsusp_arch_suspend(); + trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); if (error) printk(KERN_ERR "PM: Error %d creating hibernation image\n", error); diff --git a/kernel/power/process.c b/kernel/power/process.c index 06ec8869dbf1..0ca8d83e2369 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Timeout for stopping processes @@ -175,6 +176,7 @@ void thaw_processes(void) struct task_struct *g, *p; struct task_struct *curr = current; + trace_suspend_resume(TPS("thaw_processes"), 0, true); if (pm_freezing) atomic_dec(&system_freezing_cnt); pm_freezing = false; @@ -201,6 +203,7 @@ void thaw_processes(void) schedule(); printk("done.\n"); + trace_suspend_resume(TPS("thaw_processes"), 0, false); } void thaw_kernel_threads(void) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 963e6d0f050b..4dd8822f732a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -177,7 +177,9 @@ static int suspend_prepare(suspend_state_t state) if (error) goto Finish; + trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); + trace_suspend_resume(TPS("freeze_processes"), 0, false); if (!error) return 0; @@ -240,7 +242,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) * all the devices are suspended. */ if (state == PM_SUSPEND_FREEZE) { + trace_suspend_resume(TPS("machine_suspend"), state, true); freeze_enter(); + trace_suspend_resume(TPS("machine_suspend"), state, false); goto Platform_wake; } @@ -256,7 +260,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (!error) { *wakeup = pm_wakeup_pending(); if (!(suspend_test(TEST_CORE) || *wakeup)) { + trace_suspend_resume(TPS("machine_suspend"), + state, true); error = suspend_ops->enter(state); + trace_suspend_resume(TPS("machine_suspend"), + state, false); events_check_enabled = false; } syscore_resume(); @@ -294,7 +302,6 @@ int suspend_devices_and_enter(suspend_state_t state) if (need_suspend_ops(state) && !suspend_ops) return -ENOSYS; - trace_machine_suspend(state); if (need_suspend_ops(state) && suspend_ops->begin) { error = suspend_ops->begin(state); if (error) @@ -331,7 +338,6 @@ int suspend_devices_and_enter(suspend_state_t state) else if (state == PM_SUSPEND_FREEZE && freeze_ops->end) freeze_ops->end(); - trace_machine_suspend(PWR_EVENT_EXIT); return error; Recover_platform: @@ -365,6 +371,7 @@ static int enter_state(suspend_state_t state) { int error; + trace_suspend_resume(TPS("suspend_enter"), state, true); if (state == PM_SUSPEND_FREEZE) { #ifdef CONFIG_PM_DEBUG if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) { @@ -382,9 +389,11 @@ static int enter_state(suspend_state_t state) if (state == PM_SUSPEND_FREEZE) freeze_begin(); + trace_suspend_resume(TPS("sync_filesystems"), 0, true); printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label); error = suspend_prepare(state); @@ -394,6 +403,7 @@ static int enter_state(suspend_state_t state) if (suspend_test(TEST_FREEZER)) goto Finish; + trace_suspend_resume(TPS("suspend_enter"), state, false); pr_debug("PM: Entering %s sleep\n", pm_states[state].label); pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state);