From 5a25e3f7cc538fb49e11267c1e41c54ccf83835e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Mar 2019 12:15:13 +0100 Subject: [PATCH 01/72] cpufreq: intel_pstate: Driver-specific handling of _PPC updates In some cases, the platform firmware disables or enables turbo frequencies for all CPUs globally before triggering a _PPC change notification for one of them. Obviously, that global change affects all CPUs, not just the notified one, and it needs to be acted upon by cpufreq. The intel_pstate driver is able to detect such global changes of the settings, but it also needs to update policy limits for all CPUs if that happens, in particular if turbo frequencies are enabled globally - to allow them to be used. For this reason, introduce a new cpufreq driver callback to be invoked on _PPC notifications, if present, instead of simply calling cpufreq_update_policy() for the notified CPU and make intel_pstate use it to trigger policy updates for all CPUs in the system if global settings change. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759 Reported-by: Gabriele Mazzotta Tested-by: Gabriele Mazzotta Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/acpi/processor_perflib.c | 2 +- drivers/cpufreq/cpufreq.c | 16 ++++++++++++++++ drivers/cpufreq/intel_pstate.c | 24 ++++++++++++++++++++++++ include/linux/cpufreq.h | 4 ++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index a303fd0e108c..c73d3a62799a 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -181,7 +181,7 @@ void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag) acpi_processor_ppc_ost(pr->handle, 0); } if (ret >= 0) - cpufreq_update_policy(pr->id); + cpufreq_update_limits(pr->id); } int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e10922709d13..bb63347f6af1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2370,6 +2370,22 @@ unlock: } EXPORT_SYMBOL(cpufreq_update_policy); +/** + * cpufreq_update_limits - Update policy limits for a given CPU. + * @cpu: CPU to update the policy limits for. + * + * Invoke the driver's ->update_limits callback if present or call + * cpufreq_update_policy() for @cpu. + */ +void cpufreq_update_limits(unsigned int cpu) +{ + if (cpufreq_driver->update_limits) + cpufreq_driver->update_limits(cpu); + else + cpufreq_update_policy(cpu); +} +EXPORT_SYMBOL_GPL(cpufreq_update_limits); + /********************************************************************* * BOOST * *********************************************************************/ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b599c7318aab..e2191a570ade 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,6 +179,7 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. + * @turbo_disabled_s: Saved @turbo_disabled value. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -187,6 +188,7 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; + bool turbo_disabled_s; int max_perf_pct; int min_perf_pct; }; @@ -897,6 +899,25 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void intel_pstate_update_limits(unsigned int cpu) +{ + mutex_lock(&intel_pstate_driver_lock); + + update_turbo_state(); + /* + * If turbo has been turned on or off globally, policy limits for + * all CPUs need to be updated to reflect that. + */ + if (global.turbo_disabled_s != global.turbo_disabled) { + global.turbo_disabled_s = global.turbo_disabled; + intel_pstate_update_policies(); + } else { + cpufreq_update_policy(cpu); + } + + mutex_unlock(&intel_pstate_driver_lock); +} + /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ @@ -2138,6 +2159,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); + global.turbo_disabled_s = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; @@ -2182,6 +2204,7 @@ static struct cpufreq_driver intel_pstate = { .init = intel_pstate_cpu_init, .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, + .update_limits = intel_pstate_update_limits, .name = "intel_pstate", }; @@ -2316,6 +2339,7 @@ static struct cpufreq_driver intel_cpufreq = { .init = intel_cpufreq_cpu_init, .exit = intel_pstate_cpu_exit, .stop_cpu = intel_cpufreq_stop_cpu, + .update_limits = intel_pstate_update_limits, .name = "intel_cpufreq", }; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b160e98076e3..5005ea40364f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -195,6 +195,7 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void cpufreq_update_policy(unsigned int cpu); +void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); @@ -322,6 +323,9 @@ struct cpufreq_driver { /* should be defined, if possible */ unsigned int (*get)(unsigned int cpu); + /* Called to update policy limits on firmware notifications. */ + void (*update_limits)(unsigned int cpu); + /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); From 540a375822a40675868c5f62ae57b608a579e56a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Mar 2019 12:16:58 +0100 Subject: [PATCH 02/72] cpufreq: Add cpufreq_cpu_acquire() and cpufreq_cpu_release() It sometimes is necessary to find a cpufreq policy for a given CPU and acquire its rwsem (for writing) immediately after that, so introduce cpufreq_cpu_acquire() as a helper for that and the complementary cpufreq_cpu_release(). Make cpufreq_update_policy() use the new functions. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 56 ++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bb63347f6af1..d8fc395af773 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -250,6 +250,51 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); +/** + * cpufreq_cpu_release - Unlock a policy and decrement its usage counter. + * @policy: cpufreq policy returned by cpufreq_cpu_acquire(). + */ +static void cpufreq_cpu_release(struct cpufreq_policy *policy) +{ + if (WARN_ON(!policy)) + return; + + lockdep_assert_held(&policy->rwsem); + + up_write(&policy->rwsem); + + cpufreq_cpu_put(policy); +} + +/** + * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it. + * @cpu: CPU to find the policy for. + * + * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and + * if the policy returned by it is not NULL, acquire its rwsem for writing. + * Return the policy if it is active or release it and return NULL otherwise. + * + * The policy returned by this function has to be released with the help of + * cpufreq_cpu_release() in order to release its rwsem and balance its usage + * counter properly. + */ +static struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + + if (!policy) + return NULL; + + down_write(&policy->rwsem); + + if (policy_is_inactive(policy)) { + cpufreq_cpu_release(policy); + return NULL; + } + + return policy; +} + /********************************************************************* * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ @@ -2337,17 +2382,12 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, */ void cpufreq_update_policy(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); struct cpufreq_policy new_policy; if (!policy) return; - down_write(&policy->rwsem); - - if (policy_is_inactive(policy)) - goto unlock; - /* * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change @@ -2364,9 +2404,7 @@ void cpufreq_update_policy(unsigned int cpu) cpufreq_set_policy(policy, &new_policy); unlock: - up_write(&policy->rwsem); - - cpufreq_cpu_put(policy); + cpufreq_cpu_release(policy); } EXPORT_SYMBOL(cpufreq_update_policy); From c324f43aed89b33eee37aaf022b32c31a2b3104d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 28 Mar 2019 14:33:58 +0100 Subject: [PATCH 03/72] cpuidle: exynos: Unify target residency for AFTR and coupled AFTR states Since commit 45f1ff59e27c ("cpuidle: Return nohz hint from cpuidle_select()") Exynos CPUidle driver stopped entering C1 (AFTR) mode on Exynos4412-based Trats2 board. Further analysis revealed that the CPUidle framework changed the way it handles predicted timer ticks and reported target residency for the given idle states. As a result, the C1 (AFTR) state was not chosen anymore on completely idle device. The main issue was to high target residency value. The similar C1 (AFTR) state for 'coupled' CPUidle version used 10 times lower value for the target residency, despite the fact that it is the same state from the hardware perspective. The 100000us value for standard C1 (AFTR) mode is there from the begining of the support for this idle state, added by the commit 67173ca492ab ("ARM: EXYNOS: Add support AFTR mode on EXYNOS4210"). That commit doesn't give any reason for it, instead it looks like it was blindly copied from the WFI/IDLE state of the same driver that time. That time, that value was probably not really used by the framework for any critical decision, so it didn't matter that much. Now it turned out to be an issue, so unify the target residency with the 'coupled' version, as it seems to better match the real use case values and restores the operation of the Exynos CPUidle driver on the idle device. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Acked-by: Daniel Lezcano Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-exynos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c index 0171a6e190d7..f7199a35cbb6 100644 --- a/drivers/cpuidle/cpuidle-exynos.c +++ b/drivers/cpuidle/cpuidle-exynos.c @@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = { [1] = { .enter = exynos_enter_lowpower, .exit_latency = 300, - .target_residency = 100000, + .target_residency = 10000, .name = "C1", .desc = "ARM power down", }, From 74a1dd86d1739eae2015a3832f62c1d6546893a7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2019 10:24:56 -0700 Subject: [PATCH 04/72] PM / wakeup: Use pm_pr_dbg() instead of pr_debug() These prints are useful if we're doing PM suspend debugging. Having them at pr_debug() level means that we need to either enable DEBUG in this file, or compile the kernel with dynamic debug capabilities. Both of these options have drawbacks like custom compilation or opting into all debug statements being included into the kernel image. Given that we already have infrastructure to collect PM debugging information with CONFIG_PM_DEBUG and friends, let's change the pr_debug usage here to be pm_pr_dbg() instead so we can collect the wakeup information in the kernel logs. Signed-off-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index bb1ae175fae1..23c243a4c675 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -804,7 +804,7 @@ void pm_print_active_wakeup_sources(void) srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { - pr_debug("active wakeup source: %s\n", ws->name); + pm_pr_dbg("active wakeup source: %s\n", ws->name); active = 1; } else if (!active && (!last_activity_ws || @@ -815,7 +815,7 @@ void pm_print_active_wakeup_sources(void) } if (!active && last_activity_ws) - pr_debug("last active wakeup source: %s\n", + pm_pr_dbg("last active wakeup source: %s\n", last_activity_ws->name); srcu_read_unlock(&wakeup_srcu, srcuidx); } @@ -845,7 +845,7 @@ bool pm_wakeup_pending(void) raw_spin_unlock_irqrestore(&events_lock, flags); if (ret) { - pr_debug("Wakeup pending, aborting suspend\n"); + pm_pr_dbg("Wakeup pending, aborting suspend\n"); pm_print_active_wakeup_sources(); } From b5dee3130bb4014511f5d0dd46855ed843e3fdc8 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:41 +0800 Subject: [PATCH 05/72] PM / sleep: Refactor filesystems sync to reduce duplication Create a common helper to sync filesystems for system suspend and hibernation. Signed-off-by: Harry Pan Acked-by: Pavel Machek [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 3 +++ kernel/power/hibernate.c | 5 +---- kernel/power/main.c | 9 +++++++++ kernel/power/suspend.c | 13 +++++-------- kernel/power/user.c | 5 +---- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3f529ad9a9d2..6b3ea9ea6a9e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -425,6 +425,7 @@ void restore_processor_state(void); /* kernel/power/main.c */ extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); +extern void ksys_sync_helper(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void ksys_sync_helper(void) {} + #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index abef759de7c8..cc105ecd9c07 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -709,9 +708,7 @@ int hibernate(void) goto Exit; } - pr_info("Syncing filesystems ... \n"); - ksys_sync(); - pr_info("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 98e76cad128b..40472a7c5536 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "power.h" @@ -51,6 +52,14 @@ void unlock_system_sleep(void) } EXPORT_SYMBOL_GPL(unlock_system_sleep); +void ksys_sync_helper(void) +{ + pr_info("Syncing filesystems ... "); + ksys_sync(); + pr_cont("done.\n"); +} +EXPORT_SYMBOL_GPL(ksys_sync_helper); + /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0bd595a0b610..e39059dea38b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state) if (state == PM_SUSPEND_TO_IDLE) s2idle_begin(); -#ifndef CONFIG_SUSPEND_SKIP_SYNC - trace_suspend_resume(TPS("sync_filesystems"), 0, true); - pr_info("Syncing filesystems ... "); - ksys_sync(); - pr_cont("done.\n"); - trace_suspend_resume(TPS("sync_filesystems"), 0, false); -#endif + if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) { + trace_suspend_resume(TPS("sync_filesystems"), 0, true); + ksys_sync_helper(); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); + } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); pm_suspend_clear_flags(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 2d8b60a3c86b..cb24e840a3e6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (data->frozen) break; - printk("Syncing filesystems ... "); - ksys_sync(); - printk("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) From c64546b17bc940643545dd34eac21f51764d633c Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:43 +0800 Subject: [PATCH 06/72] PM / sleep: Measure the time of filesystems syncing Measure the filesystems sync time during system sleep more precisely. Among other things, this allows the pr_cont() to be dropped from ksys_sync_helper() and makes automatic system suspend and hibernation profiling somewhat more straightforward. Signed-off-by: Harry Pan [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 40472a7c5536..4f43e724f6eb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -54,9 +54,14 @@ EXPORT_SYMBOL_GPL(unlock_system_sleep); void ksys_sync_helper(void) { - pr_info("Syncing filesystems ... "); + ktime_t start; + long elapsed_msecs; + + start = ktime_get(); ksys_sync(); - pr_cont("done.\n"); + elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start)); + pr_info("Filesystems sync: %ld.%03ld seconds\n", + elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC); } EXPORT_SYMBOL_GPL(ksys_sync_helper); From 5861381d486601430cccf64849bd0a226154bc0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2019 23:18:01 +0100 Subject: [PATCH 07/72] PM / arch: x86: Rework the MSR_IA32_ENERGY_PERF_BIAS handling The current handling of MSR_IA32_ENERGY_PERF_BIAS in the kernel is problematic, because it may cause changes made by user space to that MSR (with the help of the x86_energy_perf_policy tool, for example) to be lost every time a CPU goes offline and then back online as well as during system-wide power management transitions into sleep states and back into the working state. The first problem is that if the current EPB value for a CPU going online is 0 ('performance'), the kernel will change it to 6 ('normal') regardless of whether or not this is the first bring-up of that CPU. That also happens during system-wide resume from sleep states (including, but not limited to, hibernation). However, the EPB may have been adjusted by user space this way and the kernel should not blindly override that setting. The second problem is that if the platform firmware resets the EPB values for any CPUs during system-wide resume from a sleep state, the kernel will not restore their previous EPB values that may have been set by user space before the preceding system-wide suspend transition. Again, that behavior may at least be confusing from the user space perspective. In order to address these issues, rework the handling of MSR_IA32_ENERGY_PERF_BIAS so that the EPB value is saved on CPU offline and restored on CPU online as well as (for the boot CPU) during the syscore stages of system-wide suspend and resume transitions, respectively. However, retain the policy by which the EPB is set to 6 ('normal') on the first bring-up of each CPU if its initial value is 0, based on the observation that 0 may mean 'not initialized' just as well as 'performance' in that case. While at it, move the MSR_IA32_ENERGY_PERF_BIAS handling code into a separate file and document it in Documentation/admin-guide. Fixes: abe48b108247 (x86, intel, power: Initialize MSR_IA32_ENERGY_PERF_BIAS) Fixes: b51ef52df71c (x86/cpu: Restore MSR_IA32_ENERGY_PERF_BIAS after resume) Reported-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki Reviewed-by: Hannes Reinecke Acked-by: Borislav Petkov Acked-by: Thomas Gleixner --- Documentation/admin-guide/pm/intel_epb.rst | 6 + .../admin-guide/pm/working-state.rst | 1 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 17 --- arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/intel.c | 34 ----- arch/x86/kernel/cpu/intel_epb.c | 131 ++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 8 files changed, 140 insertions(+), 53 deletions(-) create mode 100644 Documentation/admin-guide/pm/intel_epb.rst create mode 100644 arch/x86/kernel/cpu/intel_epb.c diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst new file mode 100644 index 000000000000..e9cfa7ec5420 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_epb.rst @@ -0,0 +1,6 @@ +====================================== +Intel Performance and Energy Bias Hint +====================================== + +.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c + :doc: overview diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index b6cef9b5e961..beb004d3632b 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -8,3 +8,4 @@ Working-State Power Management cpuidle cpufreq intel_pstate + intel_epb diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfd24f9f7614..1796d2bdcaaa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..5e37dfa4d9df 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1864,23 +1864,6 @@ void cpu_init(void) } #endif -static void bsp_resume(void) -{ - if (this_cpu->c_bsp_resume) - this_cpu->c_bsp_resume(&boot_cpu_data); -} - -static struct syscore_ops cpu_syscore_ops = { - .resume = bsp_resume, -}; - -static int __init init_cpu_syscore(void) -{ - register_syscore_ops(&cpu_syscore_ops); - return 0; -} -core_initcall(init_cpu_syscore); - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5eb946b9a9f3..c0e2407abdd6 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -14,7 +14,6 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); - void (*c_bsp_resume)(struct cpuinfo_x86 *); int c_x86_vendor; #ifdef CONFIG_X86_32 /* Optional vendor specific routine to obtain the cache size. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fc3c07fe7df5..f17c1a714779 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -596,36 +596,6 @@ detect_keyid_bits: c->x86_phys_bits -= keyid_bits; } -static void init_intel_energy_perf(struct cpuinfo_x86 *c) -{ - u64 epb; - - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized. - * (x86_energy_perf_policy(8) is available to change it at run-time.) - */ - if (!cpu_has(c, X86_FEATURE_EPB)) - return; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) - return; - - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); -} - -static void intel_bsp_resume(struct cpuinfo_x86 *c) -{ - /* - * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume, - * so reinitialize it properly like during bootup: - */ - init_intel_energy_perf(c); -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); - init_intel_energy_perf(c); - init_intel_misc_features(c); } @@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = { .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, - .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); - diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c new file mode 100644 index 000000000000..8d53cc88bd22 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Performance and Energy Bias Hint support. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Rafael J. Wysocki + */ + +#include +#include +#include + +#include +#include + +/** + * DOC: overview + * + * The Performance and Energy Bias Hint (EPB) allows software to specify its + * preference with respect to the power-performance tradeoffs present in the + * processor. Generally, the EPB is expected to be set by user space through + * the generic MSR interface (with the help of the x86_energy_perf_policy tool), + * but there are two reasons for the kernel to touch it. + * + * First, there are systems where the platform firmware resets the EPB during + * system-wide transitions from sleep states back into the working state + * effectively causing the previous EPB updates by user space to be lost. + * Thus the kernel needs to save the current EPB values for all CPUs during + * system-wide transitions to sleep states and restore them on the way back to + * the working state. That can be achieved by saving EPB for secondary CPUs + * when they are taken offline during transitions into system sleep states and + * for the boot CPU in a syscore suspend operation, so that it can be restored + * for the boot CPU in a syscore resume operation and for the other CPUs when + * they are brought back online. However, CPUs that are already offline when + * a system-wide PM transition is started are not taken offline again, but their + * EPB values may still be reset by the platform firmware during the transition, + * so in fact it is necessary to save the EPB of any CPU taken offline and to + * restore it when the given CPU goes back online at all times. + * + * Second, on many systems the initial EPB value coming from the platform + * firmware is 0 ('performance') and at least on some of them that is because + * the platform firmware does not initialize EPB at all with the assumption that + * the OS will do that anyway. That sometimes is problematic, as it may cause + * the system battery to drain too fast, for example, so it is better to adjust + * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the + * kernel changes it to 6 ('normal'). + */ + +static DEFINE_PER_CPU(u8, saved_epb); + +#define EPB_MASK 0x0fULL +#define EPB_SAVED 0x10ULL + +static int intel_epb_save(void) +{ + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + /* + * Ensure that saved_epb will always be nonzero after this write even if + * the EPB value read from the MSR is 0. + */ + this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); + + return 0; +} + +static void intel_epb_restore(void) +{ + u64 val = this_cpu_read(saved_epb); + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + if (val) { + val &= EPB_MASK; + } else { + /* + * Because intel_epb_save() has not run for the current CPU yet, + * it is going online for the first time, so if its EPB value is + * 0 ('performance') at this point, assume that it has not been + * initialized by the platform firmware and set it to 6 + * ('normal'). + */ + val = epb & EPB_MASK; + if (val == ENERGY_PERF_BIAS_PERFORMANCE) { + val = ENERGY_PERF_BIAS_NORMAL; + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + } + } + wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); +} + +static struct syscore_ops intel_epb_syscore_ops = { + .suspend = intel_epb_save, + .resume = intel_epb_restore, +}; + +static int intel_epb_online(unsigned int cpu) +{ + intel_epb_restore(); + return 0; +} + +static int intel_epb_offline(unsigned int cpu) +{ + return intel_epb_save(); +} + +static __init int intel_epb_init(void) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_EPB)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, + "x86/intel/epb:online", intel_epb_online, + intel_epb_offline); + if (ret < 0) + goto err_out_online; + + register_syscore_ops(&intel_epb_syscore_ops); + return 0; + +err_out_online: + cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); + return ret; +} +subsys_initcall(intel_epb_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e78281d07b70..dbfdd0fadbef 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, + CPUHP_AP_X86_INTEL_EPB_ONLINE, CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE, From b9c273babce791cf228fc466577f55056a699f9c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2019 23:20:17 +0100 Subject: [PATCH 08/72] PM / arch: x86: MSR_IA32_ENERGY_PERF_BIAS sysfs interface The Performance and Energy Bias Hint (EPB) is expected to be set by user space through the generic MSR interface, but that interface is not particularly nice and there are security concerns regarding it, so it is not always available. For this reason, add a sysfs interface for reading and updating the EPB, in the form of a new attribute, energy_perf_bias, located under /sys/devices/system/cpu/cpu#/power/ for online CPUs that support the EPB feature. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hannes Reinecke Acked-by: Borislav Petkov --- .../ABI/testing/sysfs-devices-system-cpu | 18 ++++ Documentation/admin-guide/pm/intel_epb.rst | 27 ++++++ arch/x86/kernel/cpu/intel_epb.c | 93 ++++++++++++++++++- 3 files changed, 134 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 9605dbd4b5b5..7f4af7da3fbc 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -518,3 +518,21 @@ Description: Control Symetric Multi Threading (SMT) If control status is "forceoff" or "notsupported" writes are rejected. + +What: /sys/devices/system/cpu/cpu#/power/energy_perf_bias +Date: March 2019 +Contact: linux-pm@vger.kernel.org +Description: Intel Energy and Performance Bias Hint (EPB) + + EPB for the given CPU in a sliding scale 0 - 15, where a value + of 0 corresponds to a hint preference for highest performance + and a value of 15 corresponds to the maximum energy savings. + + In order to change the EPB value for the CPU, write either + a number in the 0 - 15 sliding scale above, or one of the + strings: "performance", "balance-performance", "normal", + "balance-power", "power" (that represent values reflected by + their meaning), to this attribute. + + This attribute is present for all online CPUs supporting the + Intel EPB feature. diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst index e9cfa7ec5420..d100849edfc4 100644 --- a/Documentation/admin-guide/pm/intel_epb.rst +++ b/Documentation/admin-guide/pm/intel_epb.rst @@ -4,3 +4,30 @@ Intel Performance and Energy Bias Hint .. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c :doc: overview + +Intel Performance and Energy Bias Attribute in ``sysfs`` +======================================================== + +The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU +can be checked or updated through a ``sysfs`` attribute (file) under +:file:`/sys/devices/system/cpu/cpu/power/`, where the CPU number ```` +is allocated at the system initialization time: + +``energy_perf_bias`` + Shows the current EPB value for the CPU in a sliding scale 0 - 15, where + a value of 0 corresponds to a hint preference for highest performance + and a value of 15 corresponds to the maximum energy savings. + + In order to update the EPB value for the CPU, this attribute can be + written to, either with a number in the 0 - 15 sliding scale above, or + with one of the strings: "performance", "balance-performance", "normal", + "balance-power", "power" that represent values reflected by their + meaning. + + This attribute is present for all online CPUs supporting the EPB + feature. + +Note that while the EPB interface to the processor is defined at the logical CPU +level, the physical register backing it may be shared by multiple CPUs (for +example, SMT siblings or cores in one package). For this reason, updating the +EPB value for one CPU may cause the EPB values for other CPUs to change. diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index 8d53cc88bd22..f4dd73396f28 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -9,8 +9,12 @@ */ #include +#include +#include #include +#include #include +#include #include #include @@ -20,9 +24,9 @@ * * The Performance and Energy Bias Hint (EPB) allows software to specify its * preference with respect to the power-performance tradeoffs present in the - * processor. Generally, the EPB is expected to be set by user space through - * the generic MSR interface (with the help of the x86_energy_perf_policy tool), - * but there are two reasons for the kernel to touch it. + * processor. Generally, the EPB is expected to be set by user space (directly + * via sysfs or with the help of the x86_energy_perf_policy tool), but there are + * two reasons for the kernel to update it. * * First, there are systems where the platform firmware resets the EPB during * system-wide transitions from sleep states back into the working state @@ -52,6 +56,7 @@ static DEFINE_PER_CPU(u8, saved_epb); #define EPB_MASK 0x0fULL #define EPB_SAVED 0x10ULL +#define MAX_EPB EPB_MASK static int intel_epb_save(void) { @@ -97,15 +102,95 @@ static struct syscore_ops intel_epb_syscore_ops = { .resume = intel_epb_restore, }; +static const char * const energy_perf_strings[] = { + "performance", + "balance-performance", + "normal", + "balance-power", + "power" +}; +static const u8 energ_perf_values[] = { + ENERGY_PERF_BIAS_PERFORMANCE, + ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, + ENERGY_PERF_BIAS_NORMAL, + ENERGY_PERF_BIAS_BALANCE_POWERSAVE, + ENERGY_PERF_BIAS_POWERSAVE +}; + +static ssize_t energy_perf_bias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned int cpu = dev->id; + u64 epb; + int ret; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + return sprintf(buf, "%llu\n", epb); +} + +static ssize_t energy_perf_bias_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int cpu = dev->id; + u64 epb, val; + int ret; + + ret = __sysfs_match_string(energy_perf_strings, + ARRAY_SIZE(energy_perf_strings), buf); + if (ret >= 0) + val = energ_perf_values[ret]; + else if (kstrtou64(buf, 0, &val) || val > MAX_EPB) + return -EINVAL; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, + (epb & ~EPB_MASK) | val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR_RW(energy_perf_bias); + +static struct attribute *intel_epb_attrs[] = { + &dev_attr_energy_perf_bias.attr, + NULL +}; + +static const struct attribute_group intel_epb_attr_group = { + .name = power_group_name, + .attrs = intel_epb_attrs +}; + static int intel_epb_online(unsigned int cpu) { + struct device *cpu_dev = get_cpu_device(cpu); + intel_epb_restore(); + if (!cpuhp_tasks_frozen) + sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group); + return 0; } static int intel_epb_offline(unsigned int cpu) { - return intel_epb_save(); + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpuhp_tasks_frozen) + sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group); + + intel_epb_save(); + return 0; } static __init int intel_epb_init(void) From 9eca544b1491df90ea7102a7ed14acc3c562d97b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2019 11:33:21 +0100 Subject: [PATCH 09/72] cpufreq: schedutil: Simplify iowait boosting There is not reason for the minimum iowait boost value in the schedutil cpufreq governor to depend on the available range of CPU frequencies. In fact, that dependency is generally confusing, because it causes the iowait boost to behave somewhat differently on CPUs with the same maximum frequency and different minimum frequencies, for example. For this reason, replace the min field in struct sugov_cpu with a constant and choose its values to be 1/8 of SCHED_CAPACITY_SCALE (for consistency with the intel_pstate driver's internal governor). [Note that policy->cpuinfo.max_freq will not be a constant any more after a subsequent change, so this change is depended on by it.] Link: https://lore.kernel.org/lkml/20190305083202.GU32494@hirez.programming.kicks-ass.net/T/#ee20bdc98b7d89f6110c0d00e5c3ee8c2ced93c3d Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 5c41ea367422..b3a878aa593d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -13,6 +13,8 @@ #include #include +#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) + struct sugov_tunables { struct gov_attr_set attr_set; unsigned int rate_limit_us; @@ -51,7 +53,6 @@ struct sugov_cpu { u64 last_update; unsigned long bw_dl; - unsigned long min; unsigned long max; /* The field below is for single-CPU policies only: */ @@ -291,8 +292,8 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) * * The IO wait boost of a task is disabled after a tick since the last update * of a CPU. If a new IO wait boost is requested after more then a tick, then - * we enable the boost starting from the minimum frequency, which improves - * energy efficiency by ignoring sporadic wakeups from IO. + * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy + * efficiency by ignoring sporadic wakeups from IO. */ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, bool set_iowait_boost) @@ -303,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, if (delta_ns <= TICK_NSEC) return false; - sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; + sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; sg_cpu->iowait_boost_pending = set_iowait_boost; return true; @@ -317,8 +318,9 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, * * Each time a task wakes up after an IO operation, the CPU utilization can be * boosted to a certain utilization which doubles at each "frequent and - * successive" wakeup from IO, ranging from the utilization of the minimum - * OPP to the utilization of the maximum OPP. + * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization + * of the maximum OPP. + * * To keep doubling, an IO boost has to be requested at least once per tick, * otherwise we restart from the utilization of the minimum OPP. */ @@ -349,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, } /* First wakeup after IO: start with minimum boost */ - sg_cpu->iowait_boost = sg_cpu->min; + sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; } /** @@ -389,7 +391,7 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, * No boost pending; reduce the boost value. */ sg_cpu->iowait_boost >>= 1; - if (sg_cpu->iowait_boost < sg_cpu->min) { + if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { sg_cpu->iowait_boost = 0; return util; } @@ -826,9 +828,6 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->min = - (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / - policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { From 9083e4986124389e2a7c0ffca95630a4983887f0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Mar 2019 12:19:52 +0100 Subject: [PATCH 10/72] cpufreq: intel_pstate: Update max frequency on global turbo changes While the cpuinfo.max_freq value doesn't really matter for intel_pstate in the active mode, in the passive mode it is used by governors as the maximum physical frequency of the CPU and the results of governor computations generally depend on it. Also it is made available to user space via sysfs and it should match the current HW configuration. For this reason, make intel_pstate update cpuinfo.max_freq for all CPUs if it detects a global change of turbo frequency settings from "disable" to "enable" or the other way associated with a _PPC change notification from the platform firmware. Note that policy_is_inactive(), cpufreq_cpu_acquire(), cpufreq_cpu_release(), and cpufreq_set_policy() need to be made available to it for this purpose. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759 Reported-by: Gabriele Mazzotta Tested-by: Gabriele Mazzotta Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 16 ++++------------ drivers/cpufreq/intel_pstate.c | 35 ++++++++++++++++++++++++++++------ include/linux/cpufreq.h | 10 ++++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d8fc395af773..f3f79266ab48 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -34,11 +34,6 @@ static LIST_HEAD(cpufreq_policy_list); -static inline bool policy_is_inactive(struct cpufreq_policy *policy) -{ - return cpumask_empty(policy->cpus); -} - /* Macros to iterate over CPU policies */ #define for_each_suitable_policy(__policy, __active) \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ @@ -254,7 +249,7 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put); * cpufreq_cpu_release - Unlock a policy and decrement its usage counter. * @policy: cpufreq policy returned by cpufreq_cpu_acquire(). */ -static void cpufreq_cpu_release(struct cpufreq_policy *policy) +void cpufreq_cpu_release(struct cpufreq_policy *policy) { if (WARN_ON(!policy)) return; @@ -278,7 +273,7 @@ static void cpufreq_cpu_release(struct cpufreq_policy *policy) * cpufreq_cpu_release() in order to release its rwsem and balance its usage * counter properly. */ -static struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); @@ -714,9 +709,6 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy); - /** * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ @@ -2274,8 +2266,8 @@ EXPORT_SYMBOL(cpufreq_get_policy); * * The cpuinfo part of @policy is not updated by this function. */ -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy) +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy) { struct cpufreq_governor *old_gov; int ret; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e2191a570ade..08d1a1e845aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,7 +179,7 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_s: Saved @turbo_disabled value. + * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -188,7 +188,7 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_s; + bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -899,6 +899,28 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void intel_pstate_update_max_freq(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpufreq_policy new_policy; + struct cpudata *cpudata; + + if (!policy) + return; + + cpudata = all_cpu_data[cpu]; + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + + memcpy(&new_policy, policy, sizeof(*policy)); + new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq); + new_policy.min = min(policy->user_policy.min, new_policy.max); + + cpufreq_set_policy(policy, &new_policy); + + cpufreq_cpu_release(policy); +} + static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); @@ -908,9 +930,10 @@ static void intel_pstate_update_limits(unsigned int cpu) * If turbo has been turned on or off globally, policy limits for * all CPUs need to be updated to reflect that. */ - if (global.turbo_disabled_s != global.turbo_disabled) { - global.turbo_disabled_s = global.turbo_disabled; - intel_pstate_update_policies(); + if (global.turbo_disabled_mf != global.turbo_disabled) { + global.turbo_disabled_mf = global.turbo_disabled; + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(cpu); } else { cpufreq_update_policy(cpu); } @@ -2159,7 +2182,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); - global.turbo_disabled_s = global.turbo_disabled; + global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5005ea40364f..684caf067003 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -178,6 +178,11 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +static inline bool policy_is_inactive(struct cpufreq_policy *policy) +{ + return cpumask_empty(policy->cpus); +} + static inline bool policy_is_shared(struct cpufreq_policy *policy) { return cpumask_weight(policy->cpus) > 1; @@ -193,7 +198,12 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu); void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); + +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); +void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); From 108ec36b699475001f5af81ff7db624427d14dbe Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 30 Mar 2019 12:20:22 +0100 Subject: [PATCH 11/72] drivers/cpufreq: Convert some slow-path static_cpu_has() callers to boot_cpu_has() Using static_cpu_has() is pointless on those paths, convert them to the boot_cpu_has() variant. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/intel_pstate.c | 18 +++++++++--------- drivers/cpufreq/powernow-k8.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 4ac7c3cf34be..6927a8c0e748 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -124,7 +124,7 @@ static int __init amd_freq_sensitivity_init(void) PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, NULL); if (!pcidev) { - if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK)) + if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK)) return -ENODEV; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 08d1a1e845aa..840500b457c6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -527,7 +527,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data) u64 epb; int ret; - if (!static_cpu_has(X86_FEATURE_EPB)) + if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); @@ -541,7 +541,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) { s16 epp; - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * When hwp_req_data is 0, means that caller didn't read * MSR_HWP_REQUEST, so need to read and get EPP. @@ -566,7 +566,7 @@ static int intel_pstate_set_epb(int cpu, s16 pref) u64 epb; int ret; - if (!static_cpu_has(X86_FEATURE_EPB)) + if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); @@ -614,7 +614,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) if (epp < 0) return epp; - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == HWP_EPP_PERFORMANCE) return 1; if (epp <= HWP_EPP_BALANCE_PERFORMANCE) @@ -623,7 +623,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) return 3; else return 4; - } else if (static_cpu_has(X86_FEATURE_EPB)) { + } else if (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: * 0x00-0x03 : Performance @@ -651,7 +651,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, mutex_lock(&intel_pstate_limits_lock); - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { u64 value; ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value); @@ -826,7 +826,7 @@ static void intel_pstate_hwp_set(unsigned int cpu) epp = cpu_data->epp_powersave; } update_epp: - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { value &= ~GENMASK_ULL(31, 24); value |= (u64)epp << 24; } else { @@ -851,7 +851,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu) value |= HWP_MIN_PERF(min_perf); /* Set EPP/EPB to min */ - if (static_cpu_has(X86_FEATURE_HWP_EPP)) + if (boot_cpu_has(X86_FEATURE_HWP_EPP)) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); else intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE); @@ -1241,7 +1241,7 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ - if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index fb77b39a4ce3..3c12e03fa343 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1178,7 +1178,7 @@ static int powernowk8_init(void) unsigned int i, supported_cpus = 0; int ret; - if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { + if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) { __request_acpi_cpufreq(); return -ENODEV; } From b623fa320f8360f049a6f3c3ccc487cb85af4c5b Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:48 +0800 Subject: [PATCH 12/72] cpufreq: ap806: fix possible object reference leak The call to of_find_compatible_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/armada-8k-cpufreq.c:187:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 130, but without a corresponding object release within this function. ./drivers/cpufreq/armada-8k-cpufreq.c:191:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 130, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Jason Cooper Cc: Andrew Lunn Cc: Gregory Clement Cc: Sebastian Hesselbarth Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-arm-kernel@lists.infradead.org Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-8k-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index b3f4bd647e9b..988ebc326bdb 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -132,6 +132,7 @@ static int __init armada_8k_cpufreq_init(void) of_node_put(node); return -ENODEV; } + of_node_put(node); nb_cpus = num_possible_cpus(); freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL); From ddb64c5db3cc8fb9c1242214d5798b2c2865681c Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:49 +0800 Subject: [PATCH 13/72] cpufreq: imx6q: fix possible object reference leak The call to of_node_get returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/imx6q-cpufreq.c:391:4-10: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 348, but without a corresponding object release within this function. ./drivers/cpufreq/imx6q-cpufreq.c:395:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 348, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: linux-pm@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx6q-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index a4ff09f91c8f..3e17560b1efe 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -388,11 +388,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = imx6ul_opp_check_speed_grading(cpu_dev); if (ret) { if (ret == -EPROBE_DEFER) - return ret; + goto put_node; dev_err(cpu_dev, "failed to read ocotp: %d\n", ret); - return ret; + goto put_node; } } else { imx6q_opp_check_speed_grading(cpu_dev); From 7c468966f05ac9c17bb5948275283d34e6fe0660 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:50 +0800 Subject: [PATCH 14/72] cpufreq: kirkwood: fix possible object reference leak The call to of_get_child_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/kirkwood-cpufreq.c:127:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 118, but without a corresponding object release within this function. ./drivers/cpufreq/kirkwood-cpufreq.c:133:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 118, but without a corresponding object release within this function. and also do some cleanup: - of_node_put(np); - np = NULL; ... of_node_put(np); Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/kirkwood-cpufreq.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index c2dd43f3f5d8..8d63a6dc8383 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -124,13 +124,14 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); if (IS_ERR(priv.cpu_clk)) { dev_err(priv.dev, "Unable to get cpuclk\n"); - return PTR_ERR(priv.cpu_clk); + err = PTR_ERR(priv.cpu_clk); + goto out_node; } err = clk_prepare_enable(priv.cpu_clk); if (err) { dev_err(priv.dev, "Unable to prepare cpuclk\n"); - return err; + goto out_node; } kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; @@ -161,20 +162,22 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) goto out_ddr; } - of_node_put(np); - np = NULL; - err = cpufreq_register_driver(&kirkwood_cpufreq_driver); - if (!err) - return 0; + if (err) { + dev_err(priv.dev, "Failed to register cpufreq driver\n"); + goto out_powersave; + } - dev_err(priv.dev, "Failed to register cpufreq driver\n"); + of_node_put(np); + return 0; +out_powersave: clk_disable_unprepare(priv.powersave_clk); out_ddr: clk_disable_unprepare(priv.ddr_clk); out_cpu: clk_disable_unprepare(priv.cpu_clk); +out_node: of_node_put(np); return err; From ddb07fba1c645791ead16d1eee0639a033fb8cf9 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:51 +0800 Subject: [PATCH 15/72] cpufreq: maple: fix possible object reference leak The call to of_cpu_device_node_get returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/maple-cpufreq.c:213:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 177, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/maple-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index d9df89392b84..a05f1342ec02 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -210,7 +210,7 @@ static int __init maple_cpufreq_init(void) */ valp = of_get_property(cpunode, "clock-frequency", NULL); if (!valp) - return -ENODEV; + goto bail_noprops; max_freq = (*valp)/1000; maple_cpu_freqs[0].frequency = max_freq; maple_cpu_freqs[1].frequency = max_freq/2; From a9acc26b75f652f697e02a9febe2ab0da648a571 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:52 +0800 Subject: [PATCH 16/72] cpufreq/pasemi: fix possible object reference leak The call to of_get_cpu_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/pasemi-cpufreq.c:212:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 147, but without a corresponding object release within this function. ./drivers/cpufreq/pasemi-cpufreq.c:220:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 147, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/pasemi-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 75dfbd2a58ea..c7710c149de8 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -146,6 +146,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu = of_get_cpu_node(policy->cpu, NULL); + of_node_put(cpu); if (!cpu) goto out; From 8d10dc28a9ea6e8c02e825dab28699f3c72b02d9 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:53 +0800 Subject: [PATCH 17/72] cpufreq: pmac32: fix possible object reference leak The call to of_find_node_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/pmac32-cpufreq.c:557:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 552, but without a corresponding object release within this function. ./drivers/cpufreq/pmac32-cpufreq.c:569:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 552, but without a corresponding object release within this function. ./drivers/cpufreq/pmac32-cpufreq.c:598:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 587, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linux-pm@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/pmac32-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 52f0d91d30c1..9b4ce2eb8222 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -552,6 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); if (!voltage_gpio){ pr_err("missing cpu-vcore-select gpio\n"); return 1; @@ -588,6 +589,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); + of_node_put(volt_gpio_np); pvr = mfspr(SPRN_PVR); has_cpu_l2lve = !((pvr & 0xf00) == 0x100); From 233298032803f2802fe99892d0de4ab653bfece4 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Apr 2019 09:37:54 +0800 Subject: [PATCH 18/72] cpufreq: ppc_cbe: fix possible object reference leak The call to of_get_cpu_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/cpufreq/ppc_cbe_cpufreq.c:89:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 76, but without a corresponding object release within this function. ./drivers/cpufreq/ppc_cbe_cpufreq.c:89:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 76, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/ppc_cbe_cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index 41a0f0be3f9f..8414c3a4ea08 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -86,6 +86,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!cbe_get_cpu_pmd_regs(policy->cpu) || !cbe_get_cpu_mic_tm_regs(policy->cpu)) { pr_info("invalid CBE regs pointers for cpufreq\n"); + of_node_put(cpu); return -EINVAL; } From c1d1090c3c7674c965552e22a65b29423aa4f090 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Apr 2019 15:46:10 +0530 Subject: [PATCH 19/72] cpufreq: maple: Remove redundant code from maple_cpufreq_init() The success path and error path both look the same, don't duplicate the code. Signed-off-by: Viresh Kumar Acked-by: Rafael J. Wysocki --- drivers/cpufreq/maple-cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index a05f1342ec02..a94355723ef8 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -231,10 +231,6 @@ static int __init maple_cpufreq_init(void) rc = cpufreq_register_driver(&maple_cpufreq_driver); - of_node_put(cpunode); - - return rc; - bail_noprops: of_node_put(cpunode); From 1120b0f9850cb01fffcb5f4379a69c8ab7a6658f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Apr 2019 00:03:30 +0200 Subject: [PATCH 20/72] cpufreq: intel_pstate: Documentation: Add references sections Add separate refereces sections to the cpufreq.rst and intel_pstate.rst documents under admin-quide/pm and list the references to external documentation in there. Update the ACPI specification URL while at it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- Documentation/admin-guide/pm/cpufreq.rst | 10 +++++--- Documentation/admin-guide/pm/intel_pstate.rst | 25 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 7eca9026a9ed..b97ce64d5976 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -396,8 +396,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn, if it is invoked by the CFS scheduling class, the governor will use the Per-Entity Load Tracking (PELT) metric for the root control group of the -given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_ -LWN.net article for a description of the PELT mechanism). Then, the new +given CPU as the CPU utilization estimate (see the *Per-entity load tracking* +LWN.net article [1]_ for a description of the PELT mechanism). Then, the new CPU frequency to apply is computed in accordance with the formula f = 1.25 * ``f_0`` * ``util`` / ``max`` @@ -698,4 +698,8 @@ hardware feature (e.g. all Intel ones), even if the :c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set. -.. _Per-entity load tracking: https://lwn.net/Articles/531853/ +References +========== + +.. [1] Jonathan Corbet, *Per-entity load tracking*, + https://lwn.net/Articles/531853/ diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index ec0f7c111f65..6dba90b753d7 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -20,11 +20,10 @@ you have not done that yet.] For the processors supported by ``intel_pstate``, the P-state concept is broader than just an operating frequency or an operating performance point (see the -`LinuxCon Europe 2015 presentation by Kristen Accardi `_ for more +LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more information about that). For this reason, the representation of P-states used by ``intel_pstate`` internally follows the hardware specification (for details -refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual -Volume 3: System Programming Guide `_). However, the ``CPUFreq`` core +refer to Intel Software Developer’s Manual [2]_). However, the ``CPUFreq`` core uses frequencies for identifying operating performance points of CPUs and frequencies are involved in the user space interface exposed by it, so ``intel_pstate`` maps its internal representation of P-states to frequencies too @@ -561,9 +560,9 @@ or to pin every task potentially sensitive to them to a specific CPU.] On the majority of systems supported by ``intel_pstate``, the ACPI tables provided by the platform firmware contain ``_PSS`` objects returning information -that can be used for CPU performance scaling (refer to the `ACPI specification`_ -for details on the ``_PSS`` objects and the format of the information returned -by them). +that can be used for CPU performance scaling (refer to the ACPI specification +[3]_ for details on the ``_PSS`` objects and the format of the information +returned by them). The information returned by the ACPI ``_PSS`` objects is used by the ``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate`` @@ -728,6 +727,14 @@ P-state is called, the ``ftrace`` filter can be set to to -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func -.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf -.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html -.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf +References +========== + +.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*, + http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf + +.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*, + http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html + +.. [3] *Advanced Configuration and Power Interface Specification*, + https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf From fc7db767b16cf2af3be9e87a4b88e206d0e1a8b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Apr 2019 00:04:43 +0200 Subject: [PATCH 21/72] Documentation: PM: Add SPDX license tags to multiple files Add SPDX license tags to .rst files under Documentation/driver-api/pm and Documentation/admin-quide/pm. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- Documentation/admin-guide/pm/cpufreq.rst | 2 ++ Documentation/admin-guide/pm/cpuidle.rst | 2 ++ Documentation/admin-guide/pm/index.rst | 2 ++ Documentation/admin-guide/pm/intel_pstate.rst | 2 ++ Documentation/admin-guide/pm/sleep-states.rst | 2 ++ Documentation/admin-guide/pm/strategies.rst | 2 ++ Documentation/admin-guide/pm/system-wide.rst | 2 ++ Documentation/admin-guide/pm/working-state.rst | 2 ++ Documentation/driver-api/pm/cpuidle.rst | 2 ++ Documentation/driver-api/pm/devices.rst | 2 ++ Documentation/driver-api/pm/index.rst | 2 ++ Documentation/driver-api/pm/notifiers.rst | 2 ++ Documentation/driver-api/pm/types.rst | 2 ++ 13 files changed, 26 insertions(+) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index b97ce64d5976..63e54ba1fe0c 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy ` .. |intel_pstate| replace:: :doc:`intel_pstate ` diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index 9c58b35a81cb..ab8fa0856095 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state ` .. |cpufreq| replace:: :doc:`CPU Performance Scaling ` diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst index 49237ac73442..39f8f9f81e7a 100644 --- a/Documentation/admin-guide/pm/index.rst +++ b/Documentation/admin-guide/pm/index.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================ Power Management ================ diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 6dba90b753d7..5883c6c80315 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =============================================== ``intel_pstate`` CPU Performance Scaling Driver =============================================== diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst index dbf5acd49f35..6d3537ae9a94 100644 --- a/Documentation/admin-guide/pm/sleep-states.rst +++ b/Documentation/admin-guide/pm/sleep-states.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =================== System Sleep States =================== diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst index afe4d3f831fe..863172633fca 100644 --- a/Documentation/admin-guide/pm/strategies.rst +++ b/Documentation/admin-guide/pm/strategies.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =========================== Power Management Strategies =========================== diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst index 0c81e4c5de39..2b1f987b34f0 100644 --- a/Documentation/admin-guide/pm/system-wide.rst +++ b/Documentation/admin-guide/pm/system-wide.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================ System-Wide Power Management ============================ diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index b6cef9b5e961..e5fbf322f256 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================== Working-State Power Management ============================== diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst index 5842ab621a58..7e0d850e0113 100644 --- a/Documentation/driver-api/pm/cpuidle.rst +++ b/Documentation/driver-api/pm/cpuidle.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor ` .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device ` .. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver ` diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 090c151aa86b..6885d72d85ca 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops ` .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain ` .. |struct bus_type| replace:: :c:type:`struct bus_type ` diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst index 56975c6bc789..c2a9ef8d115c 100644 --- a/Documentation/driver-api/pm/index.rst +++ b/Documentation/driver-api/pm/index.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =============================== CPU and Device Power Management =============================== diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst index 62f860026992..69678a0d9cc0 100644 --- a/Documentation/driver-api/pm/notifiers.rst +++ b/Documentation/driver-api/pm/notifiers.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================= Suspend/Hibernation Notifiers ============================= diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst index 3ebdecc54104..73a231caf764 100644 --- a/Documentation/driver-api/pm/types.rst +++ b/Documentation/driver-api/pm/types.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================================== Device Power Management Data Types ================================== From fc1860d6b17fa00d16df5e608eed0526e11ccad1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Apr 2019 00:06:15 +0200 Subject: [PATCH 22/72] Documentation: PM: Unify copyright notices Unify copyright notices in the .rst files under Documentation/driver-api/pm and Documentation/admin-quide/pm. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- Documentation/admin-guide/pm/cpufreq.rst | 6 ++++-- Documentation/admin-guide/pm/cpuidle.rst | 6 ++++-- Documentation/admin-guide/pm/intel_pstate.rst | 5 +++-- Documentation/admin-guide/pm/sleep-states.rst | 6 ++++-- Documentation/admin-guide/pm/strategies.rst | 6 ++++-- Documentation/driver-api/pm/cpuidle.rst | 5 +++-- Documentation/driver-api/pm/devices.rst | 10 ++++++---- Documentation/driver-api/pm/notifiers.rst | 6 ++++-- 8 files changed, 32 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 63e54ba1fe0c..0c74a7784964 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy ` .. |intel_pstate| replace:: :doc:`intel_pstate ` @@ -7,9 +8,10 @@ CPU Performance Scaling ======================= -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki The Concept of CPU Performance Scaling ====================================== diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index ab8fa0856095..e70b365dbc60 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state ` .. |cpufreq| replace:: :doc:`CPU Performance Scaling ` @@ -7,9 +8,10 @@ CPU Idle Time Management ======================== -:: +:Copyright: |copy| 2018 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2018 Intel Corp., Rafael J. Wysocki Concepts ======== diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 5883c6c80315..67e414e34f37 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -1,12 +1,13 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: =============================================== ``intel_pstate`` CPU Performance Scaling Driver =============================================== -:: +:Copyright: |copy| 2017 Intel Corporation - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki +:Author: Rafael J. Wysocki General Information diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst index 6d3537ae9a94..cd3a28cb81f4 100644 --- a/Documentation/admin-guide/pm/sleep-states.rst +++ b/Documentation/admin-guide/pm/sleep-states.rst @@ -1,12 +1,14 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: =================== System Sleep States =================== -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki Sleep states are global low-power states of the entire system in which user space code cannot be executed and the overall system activity is significantly diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst index 863172633fca..dd0362e32fa5 100644 --- a/Documentation/admin-guide/pm/strategies.rst +++ b/Documentation/admin-guide/pm/strategies.rst @@ -1,12 +1,14 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: =========================== Power Management Strategies =========================== -:: +:Copyright: |copy| 2017 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2017 Intel Corp., Rafael J. Wysocki The Linux kernel supports two major high-level power management strategies. diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst index 7e0d850e0113..006cf6db40c6 100644 --- a/Documentation/driver-api/pm/cpuidle.rst +++ b/Documentation/driver-api/pm/cpuidle.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor ` .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device ` @@ -9,9 +10,9 @@ CPU Idle Time Management ======================== -:: +:Copyright: |copy| 2019 Intel Corporation - Copyright (c) 2019 Intel Corp., Rafael J. Wysocki +:Author: Rafael J. Wysocki CPU Idle Time Management Subsystem diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 6885d72d85ca..30835683616a 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -1,4 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops ` .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain ` @@ -14,11 +15,12 @@ Device Power Management Basics ============================== -:: +:Copyright: |copy| 2010-2011 Rafael J. Wysocki , Novell Inc. +:Copyright: |copy| 2010 Alan Stern +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2010-2011 Rafael J. Wysocki , Novell Inc. - Copyright (c) 2010 Alan Stern - Copyright (c) 2016 Intel Corp., Rafael J. Wysocki Most of the code in Linux is device drivers, so most of the Linux power management (PM) code is also driver-specific. Most drivers will do very diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst index 69678a0d9cc0..186435c43b77 100644 --- a/Documentation/driver-api/pm/notifiers.rst +++ b/Documentation/driver-api/pm/notifiers.rst @@ -1,12 +1,14 @@ .. SPDX-License-Identifier: GPL-2.0 +.. include:: ============================= Suspend/Hibernation Notifiers ============================= -:: +:Copyright: |copy| 2016 Intel Corporation + +:Author: Rafael J. Wysocki - Copyright (c) 2016 Intel Corp., Rafael J. Wysocki There are some operations that subsystems or drivers may want to carry out before hibernation/suspend or after restore/resume, but they require the system From 7973b799dbea1770742851487a98276a24c961a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Apr 2019 00:08:18 +0200 Subject: [PATCH 23/72] admin-guide: pm: intel_epb: Add SPDX license tag and copyright notice Add an SPDX license tag and a copyright notice to the intel_epb.rst file under Documentation/admin-quide/pm. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- Documentation/admin-guide/pm/intel_epb.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst index d100849edfc4..005121167af7 100644 --- a/Documentation/admin-guide/pm/intel_epb.rst +++ b/Documentation/admin-guide/pm/intel_epb.rst @@ -1,7 +1,15 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + ====================================== Intel Performance and Energy Bias Hint ====================================== +:Copyright: |copy| 2019 Intel Corporation + +:Author: Rafael J. Wysocki + + .. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c :doc: overview From 89f98d7e5fa8f7e1ba627537dc8a3cef5add7b04 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 9 Apr 2019 10:25:36 +0800 Subject: [PATCH 24/72] cpufreq: Remove cpufreq_driver check in cpufreq_boost_supported() Currently there are three calling paths for cpufreq_boost_supported() in all as below, we can see the cpufreq_driver null check is needless since it is already checked before. cpufreq_enable_boost_support() |-> if (!cpufreq_driver) |-> cpufreq_boost_supported() cpufreq_register_driver() |-> if (!driver_data ... |-> cpufreq_driver = driver_data |-> cpufreq_boost_supported() |-> remove_boost_sysfs_file() |-> cpufreq_boost_supported() cpufreq_unregister_driver() |-> if (!cpufreq_driver ... |-> remove_boost_sysfs_file() |-> cpufreq_boost_supported() Signed-off-by: Yue Hu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f3f79266ab48..3f235d5f67ee 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2472,7 +2472,7 @@ int cpufreq_boost_trigger_state(int state) static bool cpufreq_boost_supported(void) { - return likely(cpufreq_driver) && cpufreq_driver->set_boost; + return cpufreq_driver->set_boost; } static int create_boost_sysfs_file(void) From fcccc5c838c1999785a5aeb9e3bdcd00957f3e15 Mon Sep 17 00:00:00 2001 From: Kyle Lin Date: Tue, 9 Apr 2019 16:43:04 +0800 Subject: [PATCH 25/72] cpufreq: stats: Use lock by stat to replace global spin lock Stats is updated by each policy, using the lock by stat can reduce the contention. Signed-off-by: Kyle Lin Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index e2db5581489a..08b192eb22c6 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -14,7 +14,6 @@ #include #include -static DEFINE_SPINLOCK(cpufreq_stats_lock); struct cpufreq_stats { unsigned int total_trans; @@ -23,6 +22,7 @@ struct cpufreq_stats { unsigned int state_num; unsigned int last_index; u64 *time_in_state; + spinlock_t lock; unsigned int *freq_table; unsigned int *trans_table; }; @@ -39,12 +39,12 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) { unsigned int count = stats->max_state; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); memset(stats->time_in_state, 0, count * sizeof(u64)); memset(stats->trans_table, 0, count * count * sizeof(int)); stats->last_time = get_jiffies_64(); stats->total_trans = 0; - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); } static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) @@ -62,9 +62,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) if (policy->fast_switch_enabled) return 0; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); cpufreq_stats_update(stats); - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); for (i = 0; i < stats->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], @@ -211,6 +211,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) stats->state_num = i; stats->last_time = get_jiffies_64(); stats->last_index = freq_table_get_index(stats, policy->cur); + spin_lock_init(&stats->lock); policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); @@ -242,11 +243,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy, if (old_index == -1 || new_index == -1 || old_index == new_index) return; - spin_lock(&cpufreq_stats_lock); + spin_lock(&stats->lock); cpufreq_stats_update(stats); stats->last_index = new_index; stats->trans_table[old_index * stats->max_state + new_index]++; stats->total_trans++; - spin_unlock(&cpufreq_stats_lock); + spin_unlock(&stats->lock); } From c4a586fdd440931c2773a9da42c1fc564068f128 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 15 Mar 2019 23:37:51 -0400 Subject: [PATCH 26/72] PM / core: fix kerneldoc comment for dpm_watchdog_handler() This brings the kernel doc in line with the function signature. Signed-off-by: Yangtao Li Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f80d298de3fa..497238704abd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -478,7 +478,7 @@ struct dpm_watchdog { /** * dpm_watchdog_handler - Driver suspend / resume watchdog handler. - * @data: Watchdog object address. + * @t: The timer that PM watchdog depends on. * * Called when a driver has timed out suspending or resuming. * There's not much we can do here to recover so panic() to From 0b237cb2fc7b10b2c4e92477760fa7442847c804 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 15 Mar 2019 23:48:41 -0400 Subject: [PATCH 27/72] PM / core: fix kerneldoc comment for device_pm_wait_for_dev() Rearrange comment to make the comment style consistent, the previous function parameters are described first. Signed-off-by: Yangtao Li Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 497238704abd..41eba82ee7b9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -2069,8 +2069,8 @@ EXPORT_SYMBOL_GPL(__suspend_report_result); /** * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete. - * @dev: Device to wait for. * @subordinate: Device that needs to wait for @dev. + * @dev: Device to wait for. */ int device_pm_wait_for_dev(struct device *subordinate, struct device *dev) { From 49a27e279052cf71ba931a26b00194ff46510480 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:45 +0100 Subject: [PATCH 28/72] PM / Domains: Add generic data pointer to struct genpd_power_state Add a data pointer to the genpd_power_state struct, to allow a genpd backend driver to store per-state specific data. To introduce the pointer, change the way genpd deals with freeing of the corresponding allocated data. More precisely, clarify the responsibility of whom that shall free the data, by adding a ->free_states() callback to the generic_pm_domain structure. The one allocating the data will be expected to set the callback, to allow genpd to invoke it from genpd_remove(). Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 12 ++++++++++-- include/linux/pm_domain.h | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 96a6dc9d305c..ff6f992f7a1d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1686,6 +1686,12 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static void genpd_free_default_power_state(struct genpd_power_state *states, + unsigned int state_count) +{ + kfree(states); +} + static int genpd_set_default_power_state(struct generic_pm_domain *genpd) { struct genpd_power_state *state; @@ -1696,7 +1702,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) genpd->states = state; genpd->state_count = 1; - genpd->free = state; + genpd->free_states = genpd_free_default_power_state; return 0; } @@ -1812,7 +1818,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); - kfree(genpd->free); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ed5874bcee0..8e1399231753 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -69,6 +69,7 @@ struct genpd_power_state { s64 residency_ns; struct fwnode_handle *fwnode; ktime_t idle_time; + void *data; }; struct genpd_lock_ops; @@ -110,9 +111,10 @@ struct generic_pm_domain { struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; + void (*free_states)(struct genpd_power_state *states, + unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - void *free; /* Free the state that was allocated for default */ ktime_t on_time; ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; From eb594b7325f61835555140922a4cb715264a325c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:46 +0100 Subject: [PATCH 29/72] PM / Domains: Add support for CPU devices to genpd To enable a CPU device to be attached to a PM domain managed by genpd, make a few changes to it for convenience. To be able to quickly find out what CPUs are attached to a genpd, which typically becomes useful from a genpd governor as subsequent changes are about to show, add a cpumask to struct generic_pm_domain to be updated when a CPU device gets attached to the genpd containing that cpumask. Also, propagate the cpumask changes upwards in the domain hierarchy to the master PM domains. This way, the cpumask for a genpd hierarchically reflects all CPUs attached to the topology below it. Finally, make this an opt-in feature, to avoid having to manage CPUs and the cpumask for a genpd that don't need it. To that end, add a new genpd configuration bit, GENPD_FLAG_CPU_DOMAIN. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 65 ++++++++++++++++++++++++++++++++++++- include/linux/pm_domain.h | 13 ++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ff6f992f7a1d..ecac03dcc9b2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" @@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) +#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -1454,6 +1456,56 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } +static void __genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) +{ + struct gpd_link *link; + + if (!genpd_is_cpu_domain(genpd)) + return; + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + struct generic_pm_domain *master = link->master; + + genpd_lock_nested(master, depth + 1); + __genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_unlock(master); + } + + if (set) + cpumask_set_cpu(cpu, genpd->cpus); + else + cpumask_clear_cpu(cpu, genpd->cpus); +} + +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + struct device *dev, bool set) +{ + int cpu; + + if (!genpd_is_cpu_domain(genpd)) + return; + + for_each_possible_cpu(cpu) { + if (get_cpu_device(cpu) == dev) { + __genpd_update_cpumask(genpd, cpu, set, 0); + return; + } + } +} + +static void genpd_set_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, true); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, false); +} + static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) { @@ -1475,6 +1527,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, genpd_lock(genpd); + genpd_set_cpumask(genpd, dev); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1532,6 +1585,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; + genpd_clear_cpumask(genpd, dev); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -1768,11 +1822,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) return -EINVAL; + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); - if (ret) + if (ret) { + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); return ret; + } } else if (!gov && genpd->state_count > 1) { pr_warn("%s: no governor for states\n", genpd->name); } @@ -1818,6 +1879,8 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); if (genpd->free_states) genpd->free_states(genpd->states, genpd->state_count); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8e1399231753..a6e251fe9deb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,6 +16,7 @@ #include #include #include +#include /* * Flags to control the behaviour of a genpd. @@ -42,11 +43,22 @@ * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered * on, in case any of its attached devices is used * in the wakeup path to serve system wakeups. + * + * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get + * devices attached, which may belong to CPUs or + * possibly have subdomains with CPUs attached. + * This flag enables the genpd backend driver to + * deploy idle power management support for CPUs + * and groups of CPUs. Note that, the backend + * driver must then comply with the so called, + * last-man-standing algorithm, for the CPUs in the + * PM domain. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) +#define GENPD_FLAG_CPU_DOMAIN (1U << 4) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -94,6 +106,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ + cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct opp_table *opp_table; /* OPP table of the genpd */ From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:47 +0100 Subject: [PATCH 30/72] cpuidle: Export the next timer expiration for CPUs To be able to predict the sleep duration for a CPU entering idle, it is essential to know the expiration time of the next timer. Both the teo and the menu cpuidle governors already use this information for CPU idle state selection. Moving forward, a similar prediction needs to be made for a group of idle CPUs rather than for a single one and the following changes implement a new genpd governor for that purpose. In order to support that feature, add a new function called tick_nohz_get_next_hrtimer() that will return the next hrtimer expiration time of a given CPU to be invoked after deciding whether or not to stop the scheduler tick on that CPU. Make the cpuidle core call tick_nohz_get_next_hrtimer() right before invoking the ->enter() callback provided by the cpuidle driver for the given state and store its return value in the per-CPU struct cpuidle_device, so as to make it available to code outside of cpuidle. Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(), the governor's ->select() callback has already returned and indicated whether or not the tick should be stopped, so in fact the value returned by tick_nohz_get_next_hrtimer() always is the next hrtimer expiration time for the given CPU, possibly including the tick (if it hasn't been stopped). Co-developed-by: Lina Iyer Co-developed-by: Daniel Lezcano Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 19 +++++++++++++++++-- include/linux/cpuidle.h | 1 + include/linux/tick.h | 7 ++++++- kernel/time/tick-sched.c | 12 ++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/tick.h b/include/linux/tick.h index 55388ab45fd4..8891b5ac3e40 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6fa52cd6df0b..8d18e03124ff 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void) return false; } +/** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + /** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped From 2f36bde0fc8f1ab79d54bd2caa7c1cf874fd2206 Mon Sep 17 00:00:00 2001 From: "Andrew-sh.Cheng" Date: Fri, 29 Mar 2019 14:46:10 +0800 Subject: [PATCH 31/72] OPP: Introduce dev_pm_opp_find_freq_ceil_by_volt() This patch introduces a new helper routine in the OPP core, which returns the OPP with the highest frequency which has voltage less than or equal to the target voltage passed to the helper. Signed-off-by: Andrew-sh.Cheng [ Viresh: Massaged the commit log and renamed the helper with some cleanups. ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 54 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 8 +++++++ 2 files changed, 62 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0420f7e8ad5b..0e7703fe733f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -526,6 +526,60 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +/** + * dev_pm_opp_find_freq_ceil_by_volt() - Find OPP with highest frequency for + * target voltage. + * @dev: Device for which we do this operation. + * @u_volt: Target voltage. + * + * Search for OPP with highest (ceil) frequency and has voltage <= u_volt. + * + * Return: matching *opp, else returns ERR_PTR in case of error which should be + * handled using IS_ERR. + * + * Error return values can be: + * EINVAL: bad parameters + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + if (!dev || !u_volt) { + dev_err(dev, "%s: Invalid argument volt=%lu\n", __func__, + u_volt); + return ERR_PTR(-EINVAL); + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available) { + if (temp_opp->supplies[0].u_volt > u_volt) + break; + opp = temp_opp; + } + } + + /* Increment the reference count of OPP */ + if (!IS_ERR(opp)) + dev_pm_opp_get(opp); + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt); + static int _set_opp_voltage(struct device *dev, struct regulator *reg, struct dev_pm_opp_supply *supply) { diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 24c757a32a7b..b150fe97ce5a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -102,6 +102,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt); struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); @@ -207,6 +209,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-ENOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + return ERR_PTR(-ENOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { From 9df1e2c60d2b90024d5c2544f07ac9f539524322 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Wed, 10 Apr 2019 11:59:52 +0800 Subject: [PATCH 32/72] cpufreq: boost: Remove CONFIG_CPU_FREQ_BOOST_SW Kconfig option Commit 2fb4719b2560 ("cpufreq / boost: Kconfig: Support for software-managed BOOST") added the CONFIG_CPU_FREQ_BOOST_SW config. However EXYNOS based cpufreq drivers have been removed because of switching to cpufreq-dt driver which will set boost-attr if required. So, let's remove this option and update cpufreq_generic_attr[]. Signed-off-by: Yue Hu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 4 ---- drivers/cpufreq/freq_table.c | 3 --- 2 files changed, 7 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index b22e6bba71f1..4d2b33a30292 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -26,10 +26,6 @@ config CPU_FREQ_GOV_COMMON select IRQ_WORK bool -config CPU_FREQ_BOOST_SW - bool - depends on THERMAL - config CPU_FREQ_STAT bool "CPU frequency transition statistics" help diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 3a8cc99e6815..e7be0af3199f 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -290,9 +290,6 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_boost_freqs); struct freq_attr *cpufreq_generic_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, -#ifdef CONFIG_CPU_FREQ_BOOST_SW - &cpufreq_freq_attr_scaling_boost_freqs, -#endif NULL, }; EXPORT_SYMBOL_GPL(cpufreq_generic_attr); From f2a424f6c613a98560dc49fd9984589401d51648 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 16 Mar 2019 00:59:25 -0400 Subject: [PATCH 33/72] PM / core: Introduce dpm_async_fn() helper When we want to execute device pm functions asynchronously, we'll do the following for the device: 1) reinit_completion(&dev->power.completion); 2) Check if the device enables asynchronous suspend. 3) If necessary, execute the corresponding function asynchronously. There are a lot of such repeated operations here, in fact we can avoid this. So introduce dpm_async_fn() to have better code readability and reuse. And use this function to do some cleanup. Signed-off-by: Yangtao Li Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 62 +++++++++++++++------------------------ 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 41eba82ee7b9..9b8c829798f1 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -706,6 +706,19 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } +static bool dpm_async_fn(struct device *dev, async_func_t func) +{ + reinit_completion(&dev->power.completion); + + if (is_async(dev)) { + get_device(dev); + async_schedule(func, dev); + return true; + } + + return false; +} + static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = (struct device *)data; @@ -732,13 +745,8 @@ void dpm_noirq_resume_devices(pm_message_t state) * in case the starting of async threads is * delayed by non-async resuming devices. */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume_noirq, dev); - } - } + list_for_each_entry(dev, &dpm_noirq_list, power.entry) + dpm_async_fn(dev, async_resume_noirq); while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); @@ -889,13 +897,8 @@ void dpm_resume_early(pm_message_t state) * in case the starting of async threads is * delayed by non-async resuming devices. */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume_early, dev); - } - } + list_for_each_entry(dev, &dpm_late_early_list, power.entry) + dpm_async_fn(dev, async_resume_early); while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); @@ -1053,13 +1056,8 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) { - reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_resume, dev); - } - } + list_for_each_entry(dev, &dpm_suspended_list, power.entry) + dpm_async_fn(dev, async_resume); while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); @@ -1373,13 +1371,9 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie) static int device_suspend_noirq(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend_noirq, dev); + if (dpm_async_fn(dev, async_suspend_noirq)) return 0; - } + return __device_suspend_noirq(dev, pm_transition, false); } @@ -1576,13 +1570,8 @@ static void async_suspend_late(void *data, async_cookie_t cookie) static int device_suspend_late(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend_late, dev); + if (dpm_async_fn(dev, async_suspend_late)) return 0; - } return __device_suspend_late(dev, pm_transition, false); } @@ -1842,13 +1831,8 @@ static void async_suspend(void *data, async_cookie_t cookie) static int device_suspend(struct device *dev) { - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(async_suspend, dev); + if (dpm_async_fn(dev, async_suspend)) return 0; - } return __device_suspend(dev, pm_transition, false); } From e94999688e3aa3c0a8ad5a60352cdc3ca3030434 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 11 Apr 2019 20:17:33 +0200 Subject: [PATCH 34/72] PM / Domains: Add genpd governor for CPUs After some preceding changes, PM domains managed by genpd may contain CPU devices, so idle state residency values should be taken into account during the state selection process. [The residency value is the minimum amount of time to be spent by a CPU (or a group of CPUs) in an idle state in order to save more energy than could be saved by picking up a shallower idle state.] For this purpose, add a new genpd governor, pm_domain_cpu_gov, to be used for selecting idle states of PM domains with CPU devices attached either directly or through subdomains. The new governor computes the minimum expected idle duration for all online CPUs attached to a PM domain and its subdomains. Next, it finds the deepest idle state whose target residency is within the expected idle duration and selects it as the target idle state of the domain. It should be noted that the minimum expected idle duration computation is based on the closest timer event information stored in the per-CPU variables cpuidle_devices for all of the CPUs in the domain. That needs to be revisited in future, as obviously there are other reasons why a CPU may be woken up from idle. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 67 +++++++++++++++++++++++++++- include/linux/pm_domain.h | 4 ++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 4d07e38a8247..7912bc957244 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include static int dev_update_qos_constraint(struct device *dev, void *data) { @@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; - if (!genpd->max_off_time_changed) + if (!genpd->max_off_time_changed) { + genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; + } /* * We have to invalidate the cached results for the masters, so @@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->state_idx--; } + genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } @@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) return false; } +#ifdef CONFIG_CPU_IDLE +static bool cpu_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct cpuidle_device *dev; + ktime_t domain_wakeup, next_hrtimer; + s64 idle_duration_ns; + int cpu, i; + + /* Validate dev PM QoS constraints. */ + if (!default_power_down_ok(pd)) + return false; + + if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) + return true; + + /* + * Find the next wakeup for any of the online CPUs within the PM domain + * and its subdomains. Note, we only need the genpd->cpus, as it already + * contains a mask of all CPUs from subdomains. + */ + domain_wakeup = ktime_set(KTIME_SEC_MAX, 0); + for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) { + dev = per_cpu(cpuidle_devices, cpu); + if (dev) { + next_hrtimer = READ_ONCE(dev->next_hrtimer); + if (ktime_before(next_hrtimer, domain_wakeup)) + domain_wakeup = next_hrtimer; + } + } + + /* The minimum idle duration is from now - until the next wakeup. */ + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + if (idle_duration_ns <= 0) + return false; + + /* + * Find the deepest idle state that has its residency value satisfied + * and by also taking into account the power off latency for the state. + * Start at the state picked by the dev PM QoS constraint validation. + */ + i = genpd->state_idx; + do { + if (idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) { + genpd->state_idx = i; + return true; + } + } while (--i >= 0); + + return false; +} + +struct dev_power_governor pm_domain_cpu_gov = { + .suspend_ok = default_suspend_ok, + .power_down_ok = cpu_power_down_ok, +}; +#endif + struct dev_power_governor simple_qos_governor = { .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a6e251fe9deb..bc82e74560ee 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -118,6 +118,7 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; + bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -202,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; +#ifdef CONFIG_CPU_IDLE +extern struct dev_power_governor pm_domain_cpu_gov; +#endif #else static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) From e720a6c8fbdb86dcbb493d432e632dfafe6381cc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:21 +0200 Subject: [PATCH 35/72] drivers: firmware: psci: Move psci to separate directory Some following changes extends the PSCI driver with some additional files. Avoid to continue cluttering the toplevel firmware directory and first move the PSCI files into a PSCI sub-directory. Suggested-by: Mark Rutland Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Mark Rutland Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/firmware/Kconfig | 15 +-------------- drivers/firmware/Makefile | 3 +-- drivers/firmware/psci/Kconfig | 13 +++++++++++++ drivers/firmware/psci/Makefile | 4 ++++ drivers/firmware/{ => psci}/psci.c | 0 drivers/firmware/{ => psci}/psci_checker.c | 0 6 files changed, 19 insertions(+), 16 deletions(-) create mode 100644 drivers/firmware/psci/Kconfig create mode 100644 drivers/firmware/psci/Makefile rename drivers/firmware/{ => psci}/psci.c (100%) rename drivers/firmware/{ => psci}/psci_checker.c (100%) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index cac16c4b0df3..7b655f6156fb 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -5,20 +5,6 @@ menu "Firmware Drivers" -config ARM_PSCI_FW - bool - -config ARM_PSCI_CHECKER - bool "ARM PSCI checker" - depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST - help - Run the PSCI checker during startup. This checks that hotplug and - suspend operations work correctly when using PSCI. - - The torture tests may interfere with the PSCI checker by turning CPUs - on and off through hotplug, so for now torture tests and PSCI checker - are mutually exclusive. - config ARM_SCMI_PROTOCOL bool "ARM System Control and Management Interface (SCMI) Message Protocol" depends on ARM || ARM64 || COMPILE_TEST @@ -270,6 +256,7 @@ config TI_SCI_PROTOCOL config HAVE_ARM_SMCCC bool +source "drivers/firmware/psci/Kconfig" source "drivers/firmware/broadcom/Kconfig" source "drivers/firmware/google/Kconfig" source "drivers/firmware/efi/Kconfig" diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 80feb635120f..9a3909a22682 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -2,8 +2,6 @@ # # Makefile for the linux kernel. # -obj-$(CONFIG_ARM_PSCI_FW) += psci.o -obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o @@ -25,6 +23,7 @@ CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQU obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o obj-$(CONFIG_ARM_SCMI_PROTOCOL) += arm_scmi/ +obj-y += psci/ obj-y += broadcom/ obj-y += meson/ obj-$(CONFIG_GOOGLE_FIRMWARE) += google/ diff --git a/drivers/firmware/psci/Kconfig b/drivers/firmware/psci/Kconfig new file mode 100644 index 000000000000..26a3b32bf7ab --- /dev/null +++ b/drivers/firmware/psci/Kconfig @@ -0,0 +1,13 @@ +config ARM_PSCI_FW + bool + +config ARM_PSCI_CHECKER + bool "ARM PSCI checker" + depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST + help + Run the PSCI checker during startup. This checks that hotplug and + suspend operations work correctly when using PSCI. + + The torture tests may interfere with the PSCI checker by turning CPUs + on and off through hotplug, so for now torture tests and PSCI checker + are mutually exclusive. diff --git a/drivers/firmware/psci/Makefile b/drivers/firmware/psci/Makefile new file mode 100644 index 000000000000..1956b882470f --- /dev/null +++ b/drivers/firmware/psci/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +# +obj-$(CONFIG_ARM_PSCI_FW) += psci.o +obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci/psci.c similarity index 100% rename from drivers/firmware/psci.c rename to drivers/firmware/psci/psci.c diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci/psci_checker.c similarity index 100% rename from drivers/firmware/psci_checker.c rename to drivers/firmware/psci/psci_checker.c From f0f6ad9092601d95729ce14ec6da3b5055e18714 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:22 +0200 Subject: [PATCH 36/72] MAINTAINERS: Update files for PSCI The files for the PSCI firmware driver were moved to a sub-directory, so update MAINTAINERS to reflect that. Suggested-by: Mark Rutland Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Mark Rutland Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2359e12e4c41..4c7136581e52 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12415,7 +12415,7 @@ M: Mark Rutland M: Lorenzo Pieralisi L: linux-arm-kernel@lists.infradead.org S: Maintained -F: drivers/firmware/psci*.c +F: drivers/firmware/psci/ F: include/linux/psci.h F: include/uapi/linux/psci.h From 0865d20c50741e1d37d4a1108764e5e45a83973e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:23 +0200 Subject: [PATCH 37/72] drivers: firmware: psci: Split psci_dt_cpu_init_idle() Split the psci_dt_cpu_init_idle() function into two functions. This makes the code clearer and provides better re-usability. Co-developed-by: Lina Iyer Signed-off-by: Ulf Hansson Acked-by: Daniel Lezcano Acked-by: Mark Rutland Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 44 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index c80ec1d03274..9788bfc1cf8b 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -270,9 +270,26 @@ static int __init psci_features(u32 psci_func_id) #ifdef CONFIG_CPU_IDLE static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); +static int psci_dt_parse_state_node(struct device_node *np, u32 *state) +{ + int err = of_property_read_u32(np, "arm,psci-suspend-param", state); + + if (err) { + pr_warn("%pOF missing arm,psci-suspend-param property\n", np); + return err; + } + + if (!psci_power_state_is_valid(*state)) { + pr_warn("Invalid PSCI power state %#x\n", *state); + return -EINVAL; + } + + return 0; +} + static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) { - int i, ret, count = 0; + int i, ret = 0, count = 0; u32 *psci_states; struct device_node *state_node; @@ -291,29 +308,16 @@ static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) return -ENOMEM; for (i = 0; i < count; i++) { - u32 state; - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &state); - if (ret) { - pr_warn(" * %pOF missing arm,psci-suspend-param property\n", - state_node); - of_node_put(state_node); - goto free_mem; - } - + ret = psci_dt_parse_state_node(state_node, &psci_states[i]); of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", state, i); - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - ret = -EINVAL; + + if (ret) goto free_mem; - } - psci_states[i] = state; + + pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); } + /* Idle states parsed correctly, initialize per-cpu pointer */ per_cpu(psci_power_state, cpu) = psci_states; return 0; From d036b5cfef6360808117abd0c53775b7a8981a2e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:24 +0200 Subject: [PATCH 38/72] drivers: firmware: psci: Simplify error path of psci_dt_init() Instead of having each PSCI init function taking care of the of_node_put(), deal with that from psci_dt_init(), as this enables a bit simpler error path for each PSCI init function. Co-developed-by: Lina Iyer Signed-off-by: Ulf Hansson Acked-by: Mark Rutland Reviewed-by: Daniel Lezcano Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 9788bfc1cf8b..e480e0af632c 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -609,9 +609,9 @@ static int __init psci_0_2_init(struct device_node *np) int err; err = get_set_conduit_method(np); - if (err) - goto out_put_node; + return err; + /* * Starting with v0.2, the PSCI specification introduced a call * (PSCI_VERSION) that allows probing the firmware version, so @@ -619,11 +619,7 @@ static int __init psci_0_2_init(struct device_node *np) * can be carried out according to the specific version reported * by firmware */ - err = psci_probe(); - -out_put_node: - of_node_put(np); - return err; + return psci_probe(); } /* @@ -635,9 +631,8 @@ static int __init psci_0_1_init(struct device_node *np) int err; err = get_set_conduit_method(np); - if (err) - goto out_put_node; + return err; pr_info("Using PSCI v0.1 Function IDs from DT\n"); @@ -661,9 +656,7 @@ static int __init psci_0_1_init(struct device_node *np) psci_ops.migrate = psci_migrate; } -out_put_node: - of_node_put(np); - return err; + return 0; } static const struct of_device_id psci_of_match[] __initconst = { @@ -678,6 +671,7 @@ int __init psci_dt_init(void) struct device_node *np; const struct of_device_id *matched_np; psci_initcall_t init_fn; + int ret; np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); @@ -685,7 +679,10 @@ int __init psci_dt_init(void) return -ENODEV; init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); + ret = init_fn(np); + + of_node_put(np); + return ret; } #ifdef CONFIG_ACPI From 60dd1ead65e83268af9ccd19b97c7011cb50186b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:25 +0200 Subject: [PATCH 39/72] drivers: firmware: psci: Announce support for OS initiated suspend mode PSCI firmware v1.0+, supports two different modes for CPU_SUSPEND. The Platform Coordinated mode, which is the default and mandatory mode, while support for the OS initiated (OSI) mode is optional. In some cases it's interesting for the user/developer to know if the OSI mode is supported by the PSCI FW, so print a message to the log if that is the case. Co-developed-by: Lina Iyer Signed-off-by: Ulf Hansson Reviewed-by: Daniel Lezcano Acked-by: Mark Rutland Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 21 ++++++++++++++++++++- include/uapi/linux/psci.h | 5 +++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e480e0af632c..eabd01383cd6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -95,6 +95,11 @@ static inline bool psci_has_ext_power_state(void) PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK; } +static inline bool psci_has_osi_support(void) +{ + return psci_cpu_suspend_feature & PSCI_1_0_OS_INITIATED; +} + static inline bool psci_power_state_loses_context(u32 state) { const u32 mask = psci_has_ext_power_state() ? @@ -659,10 +664,24 @@ static int __init psci_0_1_init(struct device_node *np) return 0; } +static int __init psci_1_0_init(struct device_node *np) +{ + int err; + + err = psci_0_2_init(np); + if (err) + return err; + + if (psci_has_osi_support()) + pr_info("OSI mode supported.\n"); + + return 0; +} + static const struct of_device_id psci_of_match[] __initconst = { { .compatible = "arm,psci", .data = psci_0_1_init}, { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - { .compatible = "arm,psci-1.0", .data = psci_0_2_init}, + { .compatible = "arm,psci-1.0", .data = psci_1_0_init}, {}, }; diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index b3bcabe380da..581f72085c33 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -49,6 +49,7 @@ #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) +#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) @@ -97,6 +98,10 @@ #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \ (0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT) +#define PSCI_1_0_OS_INITIATED BIT(0) +#define PSCI_1_0_SUSPEND_MODE_PC 0 +#define PSCI_1_0_SUSPEND_MODE_OSI 1 + /* PSCI return values (inclusive of all PSCI versions) */ #define PSCI_RET_SUCCESS 0 #define PSCI_RET_NOT_SUPPORTED -1 From dc351d4c5f4fe4d0f274d6d660227be0c3a03317 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 11:55:16 +0200 Subject: [PATCH 40/72] PM / core: Propagate dev->power.wakeup_path when no callbacks The dev->power.direct_complete flag may become set in device_prepare() in case the device don't have any PM callbacks (dev->power.no_pm_callbacks is set). This leads to a broken behaviour, when there is child having wakeup enabled and relies on its parent to be used in the wakeup path. More precisely, when the direct complete path becomes selected for the child in __device_suspend(), the propagation of the dev->power.wakeup_path becomes skipped as well. Let's address this problem, by checking if the device is a part the wakeup path or has wakeup enabled, then prevent the direct complete path from being used. Reported-by: Loic Pallardy Signed-off-by: Ulf Hansson [ rjw: Comment cleanup ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 9b8c829798f1..43e863cc0c1b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1736,6 +1736,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + /* Avoid direct_complete to let wakeup_path propagate. */ + if (device_may_wakeup(dev) || dev->power.wakeup_path) + dev->power.direct_complete = false; + if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); From 0d00a239f70fa52eb23837ffacf990f8404ecace Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 5 Dec 2018 13:50:22 -0600 Subject: [PATCH 41/72] PM / devfreq: Use of_node_name_eq for node name comparisons Convert string compares of DT node names to use of_node_name_eq helper instead. This removes direct access to the node name pointer. For instances using of_node_cmp, this has the side effect of now using case sensitive comparisons. This should not matter for any FDT based system which all of these are. Cc: Chanwoo Choi Cc: MyungJoo Ham Cc: Kyungmin Park Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: linux-pm@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq-event.c | 2 +- drivers/devfreq/event/exynos-ppmu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c index d67242d87744..87e93406d7cd 100644 --- a/drivers/devfreq/devfreq-event.c +++ b/drivers/devfreq/devfreq-event.c @@ -240,7 +240,7 @@ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev, } list_for_each_entry(edev, &devfreq_event_list, node) { - if (!strcmp(edev->desc->name, node->name)) + if (of_node_name_eq(node, edev->desc->name)) goto out; } edev = NULL; diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index c61de0bdf053..c2ea94957501 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -529,7 +529,7 @@ static int of_get_devfreq_events(struct device_node *np, if (!ppmu_events[i].name) continue; - if (!of_node_cmp(node->name, ppmu_events[i].name)) + if (of_node_name_eq(node, ppmu_events[i].name)) break; } From a9487917ba6728dd618ae6d418a7f2197f1b4592 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 19 Jan 2019 11:04:54 -0500 Subject: [PATCH 42/72] PM / devfreq: fix mem leak in devfreq_add_device() 'devfreq' is malloced in devfreq_add_device() and should be freed in the error handling cases, otherwise it will cause memory leak. Signed-off-by: Yangtao Li Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0ae3de76833b..fa1bdde89ffc 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -651,7 +651,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_unlock(&devfreq->lock); err = set_freq_table(devfreq); if (err < 0) - goto err_out; + goto err_dev; mutex_lock(&devfreq->lock); } From 25846fa1cedada274b65ffd2413378290a60be47 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 19 Jan 2019 11:04:53 -0500 Subject: [PATCH 43/72] PM / devfreq: fix missing check of return value in devfreq_add_device() devm_kzalloc() could fail, so insert a check of its return value. And if it fails, returns -ENOMEM. Signed-off-by: Yangtao Li Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index fa1bdde89ffc..4af608a61cd9 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -689,10 +689,22 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->profile->max_state, devfreq->profile->max_state), GFP_KERNEL); + if (!devfreq->trans_table) { + mutex_unlock(&devfreq->lock); + err = -ENOMEM; + goto err_devfreq; + } + devfreq->time_in_state = devm_kcalloc(&devfreq->dev, devfreq->profile->max_state, sizeof(unsigned long), GFP_KERNEL); + if (!devfreq->time_in_state) { + mutex_unlock(&devfreq->lock); + err = -ENOMEM; + goto err_devfreq; + } + devfreq->last_stat_updated = jiffies; srcu_init_notifier_head(&devfreq->transition_notifier_list); @@ -726,7 +738,7 @@ struct devfreq *devfreq_add_device(struct device *dev, err_init: mutex_unlock(&devfreq_list_lock); - +err_devfreq: devfreq_remove_device(devfreq); devfreq = NULL; err_dev: From 6d690f77932fe1f3ce5eb2de2c5ac16d33197608 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Mon, 21 Jan 2019 11:11:07 +0900 Subject: [PATCH 44/72] PM / devfreq: consistent indentation Following up with complaints on inconsistent indentation from Yangtao Li, this fixes indentation inconsistency. In principle, this tries to put arguments aligned to the left including the first argument except for the case where the first argument is on the far-right side. Signed-off-by: MyungJoo Ham Reviewed-by: Chanwoo Choi Acked-by: Yangtao Li --- drivers/devfreq/devfreq.c | 49 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 4af608a61cd9..428a1de81008 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -528,7 +528,7 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay) mutex_lock(&devfreq->lock); if (!devfreq->stop_polling) queue_delayed_work(devfreq_wq, &devfreq->work, - msecs_to_jiffies(devfreq->profile->polling_ms)); + msecs_to_jiffies(devfreq->profile->polling_ms)); } out: mutex_unlock(&devfreq->lock); @@ -537,7 +537,7 @@ EXPORT_SYMBOL(devfreq_interval_update); /** * devfreq_notifier_call() - Notify that the device frequency requirements - * has been changed out of devfreq framework. + * has been changed out of devfreq framework. * @nb: the notifier_block (supposed to be devfreq->nb) * @type: not used * @devp: not used @@ -683,12 +683,11 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_out; } - devfreq->trans_table = - devm_kzalloc(&devfreq->dev, - array3_size(sizeof(unsigned int), - devfreq->profile->max_state, - devfreq->profile->max_state), - GFP_KERNEL); + devfreq->trans_table = devm_kzalloc(&devfreq->dev, + array3_size(sizeof(unsigned int), + devfreq->profile->max_state, + devfreq->profile->max_state), + GFP_KERNEL); if (!devfreq->trans_table) { mutex_unlock(&devfreq->lock); err = -ENOMEM; @@ -696,9 +695,9 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->time_in_state = devm_kcalloc(&devfreq->dev, - devfreq->profile->max_state, - sizeof(unsigned long), - GFP_KERNEL); + devfreq->profile->max_state, + sizeof(unsigned long), + GFP_KERNEL); if (!devfreq->time_in_state) { mutex_unlock(&devfreq->lock); err = -ENOMEM; @@ -1184,7 +1183,7 @@ static ssize_t available_governors_show(struct device *d, */ if (df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, - "%s ", df->governor_name); + "%s ", df->governor_name); /* * The devfreq device shows the registered governor except for * immutable governors such as passive governor . @@ -1497,8 +1496,8 @@ EXPORT_SYMBOL(devfreq_recommended_opp); /** * devfreq_register_opp_notifier() - Helper function to get devfreq notified - * for any changes in the OPP availability - * changes + * for any changes in the OPP availability + * changes * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1510,8 +1509,8 @@ EXPORT_SYMBOL(devfreq_register_opp_notifier); /** * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq - * notified for any changes in the OPP - * availability changes anymore. + * notified for any changes in the OPP + * availability changes anymore. * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. * @@ -1530,8 +1529,8 @@ static void devm_devfreq_opp_release(struct device *dev, void *res) } /** - * devm_ devfreq_register_opp_notifier() - * - Resource-managed devfreq_register_opp_notifier() + * devm_devfreq_register_opp_notifier() - Resource-managed + * devfreq_register_opp_notifier() * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1559,8 +1558,8 @@ int devm_devfreq_register_opp_notifier(struct device *dev, EXPORT_SYMBOL(devm_devfreq_register_opp_notifier); /** - * devm_devfreq_unregister_opp_notifier() - * - Resource-managed devfreq_unregister_opp_notifier() + * devm_devfreq_unregister_opp_notifier() - Resource-managed + * devfreq_unregister_opp_notifier() * @dev: The devfreq user device. (parent of devfreq) * @devfreq: The devfreq object. */ @@ -1579,8 +1578,8 @@ EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier); * @list: DEVFREQ_TRANSITION_NOTIFIER. */ int devfreq_register_notifier(struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct notifier_block *nb, + unsigned int list) { int ret = 0; @@ -1686,9 +1685,9 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier); * @list: DEVFREQ_TRANSITION_NOTIFIER. */ void devm_devfreq_unregister_notifier(struct device *dev, - struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) { WARN_ON(devres_release(dev, devm_devfreq_notifier_release, devm_devfreq_dev_match, devfreq)); From e2794d74f1ec6d31e662b147d55041bd00277278 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 16 Feb 2019 10:18:24 -0500 Subject: [PATCH 45/72] PM / devfreq: rk3399_dmc: remove unneeded semicolon The semicolon is unneeded, so remove it. Signed-off-by: Yangtao Li Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/rk3399_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index e795ad2b3f6b..a228dad2bee4 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -322,7 +322,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) dev_err(dev, "Cannot get the clk dmc_clk\n"); return PTR_ERR(data->dmc_clk); - }; + } data->edev = devfreq_event_get_edev_by_phandle(dev, 0); if (IS_ERR(data->edev)) From 726409698feeac20de12fd107159573539b3e063 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 16 Feb 2019 10:18:25 -0500 Subject: [PATCH 46/72] PM / devfreq: rockchip-dfi: remove unneeded semicolon The semicolon is unneeded, so remove it. Signed-off-by: Yangtao Li Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/rockchip-dfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 22b113363ffc..fcbf76ebf55d 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -211,7 +211,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev) if (IS_ERR(data->clk)) { dev_err(dev, "Cannot get the clk dmc_clk\n"); return PTR_ERR(data->clk); - }; + } /* try to find the optional reference to the pmu syscon */ node = of_parse_phandle(np, "rockchip,pmu", 0); From 1d1397c3ec1fd0bae330c376269a1c8c7e981a35 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 16 Feb 2019 10:18:26 -0500 Subject: [PATCH 47/72] PM / devfreq: tegra: remove unneeded variable This variable is not used after initialization, so remove it. And in order to unify the code style, move the location where the dev_get_drvdata is called by the way. Signed-off-by: Yangtao Li Reviewed-by: Chanwoo Choi Acked-by: Jon Hunter Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index c59d2eee5d30..c89ba7b834ff 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -573,10 +573,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq, static int tegra_governor_event_handler(struct devfreq *devfreq, unsigned int event, void *data) { - struct tegra_devfreq *tegra; - int ret = 0; - - tegra = dev_get_drvdata(devfreq->dev.parent); + struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent); switch (event) { case DEVFREQ_GOV_START: @@ -600,7 +597,7 @@ static int tegra_governor_event_handler(struct devfreq *devfreq, break; } - return ret; + return 0; } static struct devfreq_governor tegra_devfreq_governor = { From bc658bef97a70094d4347faab7cabf2f5267d03f Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 11 Mar 2019 15:36:30 +0530 Subject: [PATCH 48/72] PM / devfreq: Restart previous governor if new governor fails to start If the new governor fails to start, switch back to old governor so that the devfreq state is not left in some weird limbo. [Myungjoo: assume fatal on revert failure and set df->governor to NULL] Signed-off-by: Sibi Sankar Signed-off-by: Saravana Kannan Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 428a1de81008..08dfcdc2ac58 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1124,7 +1124,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, struct devfreq *df = to_devfreq(dev); int ret; char str_governor[DEVFREQ_NAME_LEN + 1]; - struct devfreq_governor *governor; + const struct devfreq_governor *governor, *prev_governor; ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor); if (ret != 1) @@ -1153,12 +1153,24 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, goto out; } } + prev_governor = df->governor; df->governor = governor; strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN); ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); - if (ret) + if (ret) { dev_warn(dev, "%s: Governor %s not started(%d)\n", __func__, df->governor->name, ret); + df->governor = prev_governor; + strncpy(df->governor_name, prev_governor->name, + DEVFREQ_NAME_LEN); + ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL); + if (ret) { + dev_err(dev, + "%s: reverting to Governor %s failed (%d)\n", + __func__, df->governor_name, ret); + df->governor = NULL; + } + } out: mutex_unlock(&devfreq_list_lock); From b53b0128052ffd687797d5f4deeb76327e7b5711 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 13 Mar 2019 13:22:53 +0100 Subject: [PATCH 49/72] PM / devfreq: Fix static checker warning in try_then_request_governor The patch 23c7b54ca1cd: "PM / devfreq: Fix devfreq_add_device() when drivers are built as modules." leads to the following static checker warning: drivers/devfreq/devfreq.c:1043 governor_store() warn: 'governor' can also be NULL The reason is that the try_then_request_governor() function returns both error pointers and NULL. It should just return error pointers, so fix this by returning a ERR_PTR to the error intead of returning NULL. Fixes: 23c7b54ca1cd ("PM / devfreq: Fix devfreq_add_device() when drivers are built as modules.") Reported-by: Dan Carpenter Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 08dfcdc2ac58..8928383a1fa1 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -228,7 +228,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name) * if is not found. This can happen when both drivers (the governor driver * and the driver that call devfreq_add_device) are built as modules. * devfreq_list_lock should be held by the caller. Returns the matched - * governor's pointer. + * governor's pointer or an error pointer. */ static struct devfreq_governor *try_then_request_governor(const char *name) { @@ -254,7 +254,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name) /* Restore previous state before return */ mutex_lock(&devfreq_list_lock); if (err) - return NULL; + return ERR_PTR(err); governor = find_devfreq_governor(name); } From fbb9c3c9a5acfdbcc21114de0ba16c7b488968f6 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 21 Mar 2019 10:01:10 +0100 Subject: [PATCH 50/72] PM / devfreq: exynos-bus: Suspend all devices on system shutdown Force all Exynos buses to safe operation points before doing the system reboot operation. There are board on which some aggressive power saving operation points are behind the capabilities of the bootloader to properly reset the hardware and boot the board. This way one can avoid board crash early after reboot. This fixes reboot issue on OdroidU3 board both with eMMC and SD boot. Reported-by: Markus Reichl Signed-off-by: Marek Szyprowski Acked-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/exynos-bus.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index c25658b26598..486cc5b422f1 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -514,6 +514,13 @@ err: return ret; } +static void exynos_bus_shutdown(struct platform_device *pdev) +{ + struct exynos_bus *bus = dev_get_drvdata(&pdev->dev); + + devfreq_suspend_device(bus->devfreq); +} + #ifdef CONFIG_PM_SLEEP static int exynos_bus_resume(struct device *dev) { @@ -556,6 +563,7 @@ MODULE_DEVICE_TABLE(of, exynos_bus_of_match); static struct platform_driver exynos_bus_platdrv = { .probe = exynos_bus_probe, + .shutdown = exynos_bus_shutdown, .driver = { .name = "exynos-bus", .pm = &exynos_bus_pm, From adfe3b76608ffe547af5a74415f15499b798f32a Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Thu, 21 Mar 2019 19:14:36 -0400 Subject: [PATCH 51/72] PM / devfreq: rockchip-dfi: Move GRF definitions to a common place. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some rk3399 GRF (Generic Register Files) definitions can be used for different drivers. Move these definitions to a common include so we don't need to duplicate these definitions. Signed-off-by: Enric Balletbo i Serra Acked-by: Chanwoo Choi Signed-off-by: Gaël PORTAY Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/rockchip-dfi.c | 23 +++++++---------------- include/soc/rockchip/rk3399_grf.h | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 include/soc/rockchip/rk3399_grf.h diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index fcbf76ebf55d..a436ec4901bb 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -26,6 +26,8 @@ #include #include +#include + #define RK3399_DMC_NUM_CH 2 /* DDRMON_CTRL */ @@ -43,18 +45,6 @@ #define DDRMON_CH1_COUNT_NUM 0x3c #define DDRMON_CH1_DFI_ACCESS_NUM 0x40 -/* pmu grf */ -#define PMUGRF_OS_REG2 0x308 -#define DDRTYPE_SHIFT 13 -#define DDRTYPE_MASK 7 - -enum { - DDR3 = 3, - LPDDR3 = 6, - LPDDR4 = 7, - UNUSED = 0xFF -}; - struct dmc_usage { u32 access; u32 total; @@ -83,16 +73,17 @@ static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) u32 ddr_type; /* get ddr type */ - regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val); - ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK; + regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; /* clear DDRMON_CTRL setting */ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); /* set ddr type to dfi */ - if (ddr_type == LPDDR3) + if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3) writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); - else if (ddr_type == LPDDR4) + else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4) writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); /* enable count, use software mode */ diff --git a/include/soc/rockchip/rk3399_grf.h b/include/soc/rockchip/rk3399_grf.h new file mode 100644 index 000000000000..3eebabcb2812 --- /dev/null +++ b/include/soc/rockchip/rk3399_grf.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Rockchip General Register Files definitions + * + * Copyright (c) 2018, Collabora Ltd. + * Author: Enric Balletbo i Serra + */ + +#ifndef __SOC_RK3399_GRF_H +#define __SOC_RK3399_GRF_H + +/* PMU GRF Registers */ +#define RK3399_PMUGRF_OS_REG2 0x308 +#define RK3399_PMUGRF_DDRTYPE_SHIFT 13 +#define RK3399_PMUGRF_DDRTYPE_MASK 7 +#define RK3399_PMUGRF_DDRTYPE_DDR3 3 +#define RK3399_PMUGRF_DDRTYPE_LPDDR2 5 +#define RK3399_PMUGRF_DDRTYPE_LPDDR3 6 +#define RK3399_PMUGRF_DDRTYPE_LPDDR4 7 + +#endif From 9173c5ceb035aab28171cd74dfddf27f47213b99 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Thu, 21 Mar 2019 19:14:38 -0400 Subject: [PATCH 52/72] PM / devfreq: rk3399_dmc: Pass ODT and auto power down parameters to TF-A. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trusted Firmware-A (TF-A) for rk3399 implements a SiP call to get the on-die termination (ODT) and auto power down parameters from kernel, this patch adds the functionality to do this. Also, if DDR clock frequency is lower than the on-die termination (ODT) disable frequency this driver should disable the DDR ODT. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chanwoo Choi Signed-off-by: Gaël PORTAY Signed-off-by: MyungJoo Ham --- drivers/devfreq/rk3399_dmc.c | 71 ++++++++++++++++++++++++++++- include/soc/rockchip/rockchip_sip.h | 1 + 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index a228dad2bee4..567c034d0301 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -18,14 +18,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include struct dram_timing { @@ -69,8 +72,11 @@ struct rk3399_dmcfreq { struct mutex lock; struct dram_timing timing; struct regulator *vdd_center; + struct regmap *regmap_pmu; unsigned long rate, target_rate; unsigned long volt, target_volt; + unsigned int odt_dis_freq; + int odt_pd_arg0, odt_pd_arg1; }; static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, @@ -80,6 +86,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, struct dev_pm_opp *opp; unsigned long old_clk_rate = dmcfreq->rate; unsigned long target_volt, target_rate; + struct arm_smccc_res res; + bool odt_enable = false; int err; opp = devfreq_recommended_opp(dev, freq, flags); @@ -95,6 +103,19 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, mutex_lock(&dmcfreq->lock); + if (target_rate >= dmcfreq->odt_dis_freq) + odt_enable = true; + + /* + * This makes a SMC call to the TF-A to set the DDR PD (power-down) + * timings and to enable or disable the ODT (on-die termination) + * resistors. + */ + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, + dmcfreq->odt_pd_arg1, + ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, + odt_enable, 0, 0, 0, &res); + /* * If frequency scaling from low to high, adjust voltage first. * If frequency scaling from high to low, adjust frequency first. @@ -294,11 +315,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) { struct arm_smccc_res res; struct device *dev = &pdev->dev; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node, *node; struct rk3399_dmcfreq *data; int ret, index, size; uint32_t *timing; struct dev_pm_opp *opp; + u32 ddr_type; + u32 val; data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL); if (!data) @@ -354,10 +377,56 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) } } + node = of_parse_phandle(np, "rockchip,pmu", 0); + if (node) { + data->regmap_pmu = syscon_node_to_regmap(node); + if (IS_ERR(data->regmap_pmu)) + return PTR_ERR(data->regmap_pmu); + } + + regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; + + switch (ddr_type) { + case RK3399_PMUGRF_DDRTYPE_DDR3: + data->odt_dis_freq = data->timing.ddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR3: + data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR4: + data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq; + break; + default: + return -EINVAL; + }; + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT, 0, 0, 0, 0, &res); + /* + * In TF-A there is a platform SIP call to set the PD (power-down) + * timings and to enable or disable the ODT (on-die termination). + * This call needs three arguments as follows: + * + * arg0: + * bit[0-7] : sr_idle + * bit[8-15] : sr_mc_gate_idle + * bit[16-31] : standby idle + * arg1: + * bit[0-11] : pd_idle + * bit[16-27] : srpd_lite_idle + * arg2: + * bit[0] : odt enable + */ + data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) | + ((data->timing.sr_mc_gate_idle & 0xff) << 8) | + ((data->timing.standby_idle & 0xffff) << 16); + data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) | + ((data->timing.srpd_lite_idle & 0xfff) << 16); + /* * We add a devfreq driver to our parent since it has a device tree node * with operating points. diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h index 7e28092c4d3d..ad9482c56797 100644 --- a/include/soc/rockchip/rockchip_sip.h +++ b/include/soc/rockchip/rockchip_sip.h @@ -23,5 +23,6 @@ #define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE 0x05 #define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ 0x06 #define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM 0x07 +#define ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD 0x08 #endif From 1be0730f1dcd2971db4d2fe5497a20f438b837a7 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 18 Feb 2019 19:21:08 +0100 Subject: [PATCH 53/72] trace: events: add devfreq trace event file The patch adds a new file for with trace events for devfreq framework. They are used for performance analysis of the framework. It also contains updates in MAINTAINERS file adding new entry for devfreq maintainers. Signed-off-by: Lukasz Luba Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- MAINTAINERS | 1 + include/trace/events/devfreq.h | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 include/trace/events/devfreq.h diff --git a/MAINTAINERS b/MAINTAINERS index 3671fdea5010..27ed10966c81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4552,6 +4552,7 @@ S: Maintained F: drivers/devfreq/ F: include/linux/devfreq.h F: Documentation/devicetree/bindings/devfreq/ +F: include/trace/events/devfreq.h DEVICE FREQUENCY EVENT (DEVFREQ-EVENT) M: Chanwoo Choi diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h new file mode 100644 index 000000000000..cf5b8772175d --- /dev/null +++ b/include/trace/events/devfreq.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM devfreq + +#if !defined(_TRACE_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DEVFREQ_H + +#include +#include + +TRACE_EVENT(devfreq_monitor, + TP_PROTO(struct devfreq *devfreq), + + TP_ARGS(devfreq), + + TP_STRUCT__entry( + __field(unsigned long, freq) + __field(unsigned long, busy_time) + __field(unsigned long, total_time) + __field(unsigned int, polling_ms) + __string(dev_name, dev_name(&devfreq->dev)) + ), + + TP_fast_assign( + __entry->freq = devfreq->previous_freq; + __entry->busy_time = devfreq->last_status.busy_time; + __entry->total_time = devfreq->last_status.total_time; + __entry->polling_ms = devfreq->profile->polling_ms; + __assign_str(dev_name, dev_name(&devfreq->dev)); + ), + + TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu", + __get_str(dev_name), __entry->freq, __entry->polling_ms, + __entry->total_time == 0 ? 0 : + (100 * __entry->busy_time) / __entry->total_time) +); +#endif /* _TRACE_DEVFREQ_H */ + +/* This part must be outside protection */ +#include From cf451adfa392bd9ba36f31659dbe6a5010b46ef9 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 18 Feb 2019 19:21:09 +0100 Subject: [PATCH 54/72] PM / devfreq: add tracing for scheduling work This patch add basic tracing of the devfreq workqueue and delayed work. It aims to capture changes of the polling intervals and device state. Signed-off-by: Lukasz Luba Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 8928383a1fa1..6b6991f0e873 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -29,6 +29,9 @@ #include #include "governor.h" +#define CREATE_TRACE_POINTS +#include + static struct class *devfreq_class; /* @@ -394,6 +397,8 @@ static void devfreq_monitor(struct work_struct *work) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); mutex_unlock(&devfreq->lock); + + trace_devfreq_monitor(devfreq); } /** From 4302e381a870aafb547e6139830e5a4ee2cb8261 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 15 Apr 2019 12:47:46 +0100 Subject: [PATCH 55/72] firmware/psci: add support for SYSTEM_RESET2 PSCI v1.1 introduced SYSTEM_RESET2 to allow both architectural resets where the semantics are described by the PSCI specification itself as well as vendor-specific resets. Currently only system warm reset semantics is defined as part of architectural resets by the specification. This patch implements support for SYSTEM_RESET2 by making using of reboot_mode passed by the reboot infrastructure in the kernel. Acked-by: Mark Rutland Tested-by: Aaro Koskinen Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 24 +++++++++++++++++++++++- include/uapi/linux/psci.h | 2 ++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index eabd01383cd6..fe090ef43d28 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -88,6 +88,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; PSCI_1_0_EXT_POWER_STATE_TYPE_MASK) static u32 psci_cpu_suspend_feature; +static bool psci_system_reset2_supported; static inline bool psci_has_ext_power_state(void) { @@ -258,7 +259,17 @@ static int get_set_conduit_method(struct device_node *np) static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) { - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + if ((reboot_mode == REBOOT_WARM || reboot_mode == REBOOT_SOFT) && + psci_system_reset2_supported) { + /* + * reset_type[31] = 0 (architectural) + * reset_type[30:0] = 0 (SYSTEM_WARM_RESET) + * cookie = 0 (ignored by the implementation) + */ + invoke_psci_fn(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2), 0, 0, 0); + } else { + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + } } static void psci_sys_poweroff(void) @@ -460,6 +471,16 @@ static const struct platform_suspend_ops psci_suspend_ops = { .enter = psci_system_suspend_enter, }; +static void __init psci_init_system_reset2(void) +{ + int ret; + + ret = psci_features(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2)); + + if (ret != PSCI_RET_NOT_SUPPORTED) + psci_system_reset2_supported = true; +} + static void __init psci_init_system_suspend(void) { int ret; @@ -597,6 +618,7 @@ static int __init psci_probe(void) psci_init_smccc(); psci_init_cpu_suspend(); psci_init_system_suspend(); + psci_init_system_reset2(); } return 0; diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 581f72085c33..2fcad1dd0b0e 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -50,8 +50,10 @@ #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff From eae2ef0ed2460c56915b13e7d67bb46ae379ec72 Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Mon, 15 Apr 2019 14:03:58 +0300 Subject: [PATCH 56/72] drivers/cpufreq/acpi-cpufreq.c: This fixes the following checkpatch warning WARNING: Prefer using '"%s...", __func__' to using function's name, in a string Switch hardcoded function name with a reference to __func__ making the code more maintainable Signed-off-by: Mohan Kumar Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index c72258a44ba4..73bb2aafb1a8 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -366,7 +366,7 @@ static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *dat val = drv_read(data, mask); - pr_debug("get_cur_val = %u\n", val); + pr_debug("%s = %u\n", __func__, val); return val; } @@ -378,7 +378,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) unsigned int freq; unsigned int cached_freq; - pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); + pr_debug("%s (%d)\n", __func__, cpu); policy = cpufreq_cpu_get_raw(cpu); if (unlikely(!policy)) @@ -458,8 +458,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, if (acpi_pstate_strict) { if (!check_freqs(policy, mask, policy->freq_table[index].frequency)) { - pr_debug("acpi_cpufreq_target failed (%d)\n", - policy->cpu); + pr_debug("%s (%d)\n", __func__, policy->cpu); result = -EAGAIN; } } @@ -573,7 +572,7 @@ static int cpufreq_boost_down_prep(unsigned int cpu) static int __init acpi_cpufreq_early_init(void) { unsigned int i; - pr_debug("acpi_cpufreq_early_init\n"); + pr_debug("%s\n", __func__); acpi_perf_data = alloc_percpu(struct acpi_processor_performance); if (!acpi_perf_data) { @@ -657,7 +656,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) static int blacklisted; #endif - pr_debug("acpi_cpufreq_cpu_init\n"); + pr_debug("%s\n", __func__); #ifdef CONFIG_SMP if (blacklisted) @@ -856,7 +855,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct acpi_cpufreq_data *data = policy->driver_data; - pr_debug("acpi_cpufreq_cpu_exit\n"); + pr_debug("%s\n", __func__); policy->fast_switch_possible = false; policy->driver_data = NULL; @@ -881,7 +880,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { struct acpi_cpufreq_data *data = policy->driver_data; - pr_debug("acpi_cpufreq_resume\n"); + pr_debug("%s\n", __func__); data->resume = 1; @@ -954,7 +953,7 @@ static int __init acpi_cpufreq_init(void) if (cpufreq_get_current_driver()) return -EEXIST; - pr_debug("acpi_cpufreq_init\n"); + pr_debug("%s\n", __func__); ret = acpi_cpufreq_early_init(); if (ret) @@ -991,7 +990,7 @@ static int __init acpi_cpufreq_init(void) static void __exit acpi_cpufreq_exit(void) { - pr_debug("acpi_cpufreq_exit\n"); + pr_debug("%s\n", __func__); acpi_cpufreq_boost_exit(); From b23aa311fa1f376640069e5da897b4a6148d104f Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 16 Apr 2019 10:40:27 +0800 Subject: [PATCH 57/72] cpufreq: Remove needless bios_limit check in show_bios_limit() Initially, bios_limit attribute will be created if driver->bios_limit is set in cpufreq_add_dev_interface(). So remove the redundant check for latter show operation. Signed-off-by: Yue Hu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3f235d5f67ee..df7d112fe621 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -894,11 +894,9 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) { unsigned int limit; int ret; - if (cpufreq_driver->bios_limit) { - ret = cpufreq_driver->bios_limit(policy->cpu, &limit); - if (!ret) - return sprintf(buf, "%u\n", limit); - } + ret = cpufreq_driver->bios_limit(policy->cpu, &limit); + if (!ret) + return sprintf(buf, "%u\n", limit); return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); } From e757e7fa3a93afa54a1bf31953f458b0005e0910 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 16 Apr 2019 12:23:05 -0400 Subject: [PATCH 58/72] PM / Domains: remove unnecessary unlikely() WARN_ON() already contains an unlikely(), so it's not necessary to use unlikely. Signed-off-by: Yangtao Li Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 96a6dc9d305c..598a4e02aee1 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -391,11 +391,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) if (unlikely(!genpd->set_performance_state)) return -EINVAL; - if (unlikely(!dev->power.subsys_data || - !dev->power.subsys_data->domain_data)) { - WARN_ON(1); + if (WARN_ON(!dev->power.subsys_data || + !dev->power.subsys_data->domain_data)) return -EINVAL; - } genpd_lock(genpd); From 4db7c34cb41a3d7ed7db7a84413d7c225278c36a Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Fri, 19 Apr 2019 14:27:58 +0800 Subject: [PATCH 59/72] cpufreq: Move ->get callback check outside of __cpufreq_get() Currenly, __cpufreq_get() called by show_cpuinfo_cur_freq() will check ->get callback. That is needless since cpuinfo_cur_freq attribute will not be created if ->get is not set. So let's drop it in __cpufreq_get(). Also keep this check in cpufreq_get(). Signed-off-by: Yue Hu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index df7d112fe621..92604afdeec4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1585,7 +1585,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { unsigned int ret_freq = 0; - if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get) + if (unlikely(policy_is_inactive(policy))) return ret_freq; ret_freq = cpufreq_driver->get(policy->cpu); @@ -1623,7 +1623,8 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - ret_freq = __cpufreq_get(policy); + if (cpufreq_driver->get) + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); From 71b77697af9ef06b559875e4bd8dc3d141807c93 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Apr 2019 12:27:55 +0200 Subject: [PATCH 60/72] PM / Domains: Don't kfree() the virtual device in the error path It's not correct to call kfree(dev) when device_register(dev) has failed. Fix this by calling put_device(dev) instead. Fixes: 3c095f32a92b ("PM / Domains: Add support for multi PM domains per device to genpd") Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Acked-by: Niklas Cassel Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 598a4e02aee1..5422fc01dca3 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2455,7 +2455,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, ret = device_register(virt_dev); if (ret) { - kfree(virt_dev); + put_device(virt_dev); return ERR_PTR(ret); } From e8b04de9da71b56dbbc5fa443d4ab52b617977bb Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Apr 2019 12:27:56 +0200 Subject: [PATCH 61/72] PM / Domains: Allow OF lookup for multi PM domain case from ->attach_dev() A genpd provider that uses the ->attach_dev() callback to look up resources for a device fails to do so when the device has multiple PM domains attached. That is because when genpd invokes the ->attach_dev() callback, it passes the allocated virtual device as the in-parameter. To address this problem, simply assign the dev->of_node for the virtual device, based upon the original device's OF node. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Acked-by: Niklas Cassel Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5422fc01dca3..a0b021d53084 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2272,6 +2272,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_last); static void genpd_release_dev(struct device *dev) { + of_node_put(dev->of_node); kfree(dev); } @@ -2333,14 +2334,14 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, - unsigned int index, bool power_on) +static int __genpd_dev_pm_attach(struct device *dev, unsigned int index, + bool power_on) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; int ret; - ret = of_parse_phandle_with_args(np, "power-domains", + ret = of_parse_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells", index, &pd_args); if (ret < 0) return ret; @@ -2408,7 +2409,7 @@ int genpd_dev_pm_attach(struct device *dev) "#power-domain-cells") != 1) return 0; - return __genpd_dev_pm_attach(dev, dev->of_node, 0, true); + return __genpd_dev_pm_attach(dev, 0, true); } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -2452,6 +2453,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev)); virt_dev->bus = &genpd_bus_type; virt_dev->release = genpd_release_dev; + virt_dev->of_node = of_node_get(dev->of_node); ret = device_register(virt_dev); if (ret) { @@ -2460,7 +2462,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, } /* Try to attach the device to the PM domain at the specified index. */ - ret = __genpd_dev_pm_attach(virt_dev, dev->of_node, index, false); + ret = __genpd_dev_pm_attach(virt_dev, index, false); if (ret < 1) { device_unregister(virt_dev); return ret ? ERR_PTR(ret) : NULL; From 3ccf3f0cd1971e007680114ff732e8a717aafbf8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Apr 2019 12:27:57 +0200 Subject: [PATCH 62/72] PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain If a call to dev_pm_domain_attach() succeeds to attach a device to its single PM domain, the important point is to prevent subsequent dev_pm_domain_attach_by_name|id() calls from failing. That is done by checking the dev->pm_domain pointer and then returning -EEXIST, rather than continuing to call genpd_dev_pm_attach_by_id|name(). For this reason, enable genpd_dev_pm_attach_by_id|name() to be used for single PM domains too. This simplifies future users, so they only need to use dev_pm_domain_attach_by_id|name() instead of having to combine it with dev_pm_domain_attach(). Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Acked-by: Niklas Cassel Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index a0b021d53084..d97bcf6918fb 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2439,10 +2439,10 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, if (!dev->of_node) return NULL; - /* Deal only with devices using multiple PM domains. */ + /* Verify that the index is within a valid range. */ num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells"); - if (num_domains < 2 || index >= num_domains) + if (index >= num_domains) return NULL; /* Allocate and register device on the genpd bus. */ From 4235a5947a16241be1ea331fe60e79e03fa5faa1 Mon Sep 17 00:00:00 2001 From: Yuantian Tang Date: Wed, 24 Apr 2019 10:32:23 +0800 Subject: [PATCH 63/72] cpufreq: qoriq: Add ls1028a chip support Enable cpufreq feature on ls1028a chip by adding its compatible string. Signed-off-by: Yuantian Tang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qoriq-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 4295e5476264..d308c4de467d 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -280,6 +280,7 @@ static const struct of_device_id node_matches[] __initconst = { { .compatible = "fsl,ls1012a-clockgen", }, { .compatible = "fsl,ls1021a-clockgen", }, + { .compatible = "fsl,ls1028a-clockgen", }, { .compatible = "fsl,ls1043a-clockgen", }, { .compatible = "fsl,ls1046a-clockgen", }, { .compatible = "fsl,ls1088a-clockgen", }, From 51dcf7482f4b3166ef477d44181648110d501d25 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 25 Apr 2019 11:04:10 +0200 Subject: [PATCH 64/72] PM / Domains: Use the base device for driver_deferred_probe_check_state() When genpd fails to attach a device to one of its multiple PM domains, we end up calling driver_deferred_probe_check_state() for the recently allocated virtual device. This is incorrect, as it's the base device that is being probed. Fix this by passing along the base device to __genpd_dev_pm_attach() and use that instead. Fixes: e01afc325025 ("PM / Domains: Stop deferring probe at the end of initcall") Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 8362dfe187f5..8aca1c9b4406 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2405,8 +2405,8 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -static int __genpd_dev_pm_attach(struct device *dev, unsigned int index, - bool power_on) +static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, + unsigned int index, bool power_on) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; @@ -2424,7 +2424,7 @@ static int __genpd_dev_pm_attach(struct device *dev, unsigned int index, mutex_unlock(&gpd_list_lock); dev_dbg(dev, "%s() failed to find PM domain: %ld\n", __func__, PTR_ERR(pd)); - return driver_deferred_probe_check_state(dev); + return driver_deferred_probe_check_state(base_dev); } dev_dbg(dev, "adding to PM domain %s\n", pd->name); @@ -2480,7 +2480,7 @@ int genpd_dev_pm_attach(struct device *dev) "#power-domain-cells") != 1) return 0; - return __genpd_dev_pm_attach(dev, 0, true); + return __genpd_dev_pm_attach(dev, dev, 0, true); } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -2533,7 +2533,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, } /* Try to attach the device to the PM domain at the specified index. */ - ret = __genpd_dev_pm_attach(virt_dev, index, false); + ret = __genpd_dev_pm_attach(virt_dev, dev, index, false); if (ret < 1) { device_unregister(virt_dev); return ret ? ERR_PTR(ret) : NULL; From a174920d69db9be80332f535db4a8ca2e3bccf24 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 25 Apr 2019 11:04:11 +0200 Subject: [PATCH 65/72] PM / Domains: Drop unused in-parameter to some genpd functions Both genpd_alloc_dev_data() and genpd_add_device(), that are internal genpd functions, allow a struct gpd_timing_data *td to be passed as an in-parameter. However, as NULL is always passed, let's just drop the in-parameter altogether. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 8aca1c9b4406..93298b7db408 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1396,8 +1396,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #endif /* CONFIG_PM_SLEEP */ -static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, - struct gpd_timing_data *td) +static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) { struct generic_pm_domain_data *gpd_data; int ret; @@ -1412,9 +1411,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, goto err_put; } - if (td) - gpd_data->td = *td; - gpd_data->base.dev = dev; gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; @@ -1504,8 +1500,7 @@ static void genpd_clear_cpumask(struct generic_pm_domain *genpd, genpd_update_cpumask(genpd, dev, false); } -static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, - struct gpd_timing_data *td) +static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { struct generic_pm_domain_data *gpd_data; int ret; @@ -1515,7 +1510,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - gpd_data = genpd_alloc_dev_data(dev, td); + gpd_data = genpd_alloc_dev_data(dev); if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); @@ -1553,7 +1548,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) int ret; mutex_lock(&gpd_list_lock); - ret = genpd_add_device(genpd, dev, NULL); + ret = genpd_add_device(genpd, dev); mutex_unlock(&gpd_list_lock); return ret; @@ -2259,7 +2254,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) goto out; } - ret = genpd_add_device(genpd, dev, NULL); + ret = genpd_add_device(genpd, dev); out: mutex_unlock(&gpd_list_lock); @@ -2429,7 +2424,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev_dbg(dev, "adding to PM domain %s\n", pd->name); - ret = genpd_add_device(pd, dev, NULL); + ret = genpd_add_device(pd, dev); mutex_unlock(&gpd_list_lock); if (ret < 0) { From b24e196586fecafed1c3cff9b2f87c1a64138ade Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 25 Apr 2019 11:04:12 +0200 Subject: [PATCH 66/72] PM / Domains: Search for the CPU device outside the genpd lock While attaching/detaching a device to a PM domain (genpd) with GENPD_FLAG_CPU_DOMAIN set, genpd iterates the cpu_possible_mask to check whether or not the device corresponds to a CPU. This iteration is done while holding the genpd's lock, which is unnecessary. Avoid the locking by restructuring the corresponding code a bit. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 52 +++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 93298b7db408..da1c99178943 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1450,8 +1450,8 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } -static void __genpd_update_cpumask(struct generic_pm_domain *genpd, - int cpu, bool set, unsigned int depth) +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) { struct gpd_link *link; @@ -1462,7 +1462,7 @@ static void __genpd_update_cpumask(struct generic_pm_domain *genpd, struct generic_pm_domain *master = link->master; genpd_lock_nested(master, depth + 1); - __genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_update_cpumask(master, cpu, set, depth + 1); genpd_unlock(master); } @@ -1472,38 +1472,37 @@ static void __genpd_update_cpumask(struct generic_pm_domain *genpd, cpumask_clear_cpu(cpu, genpd->cpus); } -static void genpd_update_cpumask(struct generic_pm_domain *genpd, - struct device *dev, bool set) +static void genpd_set_cpumask(struct generic_pm_domain *genpd, int cpu) +{ + if (cpu >= 0) + genpd_update_cpumask(genpd, cpu, true, 0); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, int cpu) +{ + if (cpu >= 0) + genpd_update_cpumask(genpd, cpu, false, 0); +} + +static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev) { int cpu; if (!genpd_is_cpu_domain(genpd)) - return; + return -1; for_each_possible_cpu(cpu) { - if (get_cpu_device(cpu) == dev) { - __genpd_update_cpumask(genpd, cpu, set, 0); - return; - } + if (get_cpu_device(cpu) == dev) + return cpu; } -} -static void genpd_set_cpumask(struct generic_pm_domain *genpd, - struct device *dev) -{ - genpd_update_cpumask(genpd, dev, true); -} - -static void genpd_clear_cpumask(struct generic_pm_domain *genpd, - struct device *dev) -{ - genpd_update_cpumask(genpd, dev, false); + return -1; } static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { struct generic_pm_domain_data *gpd_data; - int ret; + int ret, cpu; dev_dbg(dev, "%s()\n", __func__); @@ -1514,13 +1513,15 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); + cpu = genpd_get_cpu(genpd, dev); + ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) goto out; genpd_lock(genpd); - genpd_set_cpumask(genpd, dev); + genpd_set_cpumask(genpd, cpu); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1560,13 +1561,14 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; - int ret = 0; + int cpu, ret = 0; dev_dbg(dev, "%s()\n", __func__); pdd = dev->power.subsys_data->domain_data; gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + cpu = genpd_get_cpu(genpd, dev); genpd_lock(genpd); @@ -1578,7 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; - genpd_clear_cpumask(genpd, dev); + genpd_clear_cpumask(genpd, cpu); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); From f9ccd7c3a1d87cea3a6f9ed6c946dee9e7456b2e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 25 Apr 2019 11:04:13 +0200 Subject: [PATCH 67/72] PM / Domains: Allow to attach a CPU via genpd_dev_pm_attach_by_id|name() Attaching a device via genpd_dev_pm_attach_by_id|name() makes genpd allocate a virtual device that it attaches instead. This leads to a problem in case when the base device belongs to a CPU. More precisely, it means genpd_get_cpu() compares against the virtual device, thus it fails to find a matching CPU device. Address this limitation by passing the base device to genpd_get_cpu() rather than the virtual device. Moreover, to deal with detach correctly from genpd_remove_device(), store the CPU number in struct generic_pm_domain_data, so as to be able to clear the corresponding bit in the cpumask for the genpd. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 20 ++++++++++---------- include/linux/pm_domain.h | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index da1c99178943..3d899e8abd58 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1499,10 +1499,11 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev) return -1; } -static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct device *base_dev) { struct generic_pm_domain_data *gpd_data; - int ret, cpu; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -1513,7 +1514,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - cpu = genpd_get_cpu(genpd, dev); + gpd_data->cpu = genpd_get_cpu(genpd, base_dev); ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) @@ -1521,7 +1522,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) genpd_lock(genpd); - genpd_set_cpumask(genpd, cpu); + genpd_set_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1549,7 +1550,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) int ret; mutex_lock(&gpd_list_lock); - ret = genpd_add_device(genpd, dev); + ret = genpd_add_device(genpd, dev, dev); mutex_unlock(&gpd_list_lock); return ret; @@ -1561,14 +1562,13 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; - int cpu, ret = 0; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); pdd = dev->power.subsys_data->domain_data; gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); - cpu = genpd_get_cpu(genpd, dev); genpd_lock(genpd); @@ -1580,7 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; - genpd_clear_cpumask(genpd, cpu); + genpd_clear_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -2256,7 +2256,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) goto out; } - ret = genpd_add_device(genpd, dev); + ret = genpd_add_device(genpd, dev, dev); out: mutex_unlock(&gpd_list_lock); @@ -2426,7 +2426,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev_dbg(dev, "adding to PM domain %s\n", pd->name); - ret = genpd_add_device(pd, dev); + ret = genpd_add_device(pd, dev, base_dev); mutex_unlock(&gpd_list_lock); if (ret < 0) { diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index bc82e74560ee..0e8e356bed6a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -175,6 +175,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + int cpu; unsigned int performance_state; void *data; }; From c208ac8f8f862dba7b01eb54557f4803b3c17296 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Apr 2019 16:11:37 +0200 Subject: [PATCH 68/72] x86: tsc: Rework time_cpufreq_notifier() There are problems with running time_cpufreq_notifier() on SMP systems. First off, the rdtsc() called from there runs on the CPU executing that code and not necessarily on the CPU whose sched_clock() rate is updated which is questionable at best. Second, in the cases when the frequencies of all CPUs in an SMP system are always in sync, it is not sufficient to update just one of them or the set associated with a given cpufreq policy on frequency changes - all CPUs in the system should be updated and that would require more than a simple transition notifier. Note, however, that the underlying issue (the TSC rate depending on the CPU frequency) has not been present in hardware shipping for the last few years and in quite a few relevant cases (acpi-cpufreq in particular) running time_cpufreq_notifier() will cause the TSC to be marked as unstable anyway. For this reason, make time_cpufreq_notifier() simply mark the TSC as unstable and give up when run on SMP and only try to carry out any adjustments otherwise. Signed-off-by: Rafael J. Wysocki Reviewed-by: Viresh Kumar --- arch/x86/kernel/tsc.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3fae23834069..cc6df5c6d7b3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void) /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) + * speed as the bootup CPU. */ static void __init cyc2ns_init_secondary_cpus(void) { @@ -937,12 +936,12 @@ void tsc_restore_sched_clock_state(void) } #ifdef CONFIG_CPU_FREQ -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency +/* + * Frequency scaling support. Adjust the TSC based timer when the CPU frequency * changes. * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. + * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC + * as unstable and give up in those cases. * * Should fix up last_tsc too. Currently gettimeofday in the * first tick after the change will be slightly wrong. @@ -956,22 +955,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj; - lpj = &boot_cpu_data.loops_per_jiffy; -#ifdef CONFIG_SMP - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#endif + if (num_online_cpus() > 1) { + mark_tsc_unstable("cpufreq changes on SMP"); + return 0; + } if (!ref_freq) { ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; tsc_khz_ref = tsc_khz; } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + boot_cpu_data.loops_per_jiffy = + cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) From 712e9ad0a2bd03e6685e9154985129d3f90efb6f Mon Sep 17 00:00:00 2001 From: Vabhav Sharma Date: Fri, 26 Apr 2019 06:55:55 +0000 Subject: [PATCH 69/72] cpufreq: qoriq: add support for lx2160a Enable support of NXP SoC lx2160a to handle the lx2160a SoC. Signed-off-by: Tang Yuantian Signed-off-by: Yogesh Gaur Signed-off-by: Vabhav Sharma Acked-by: Scott Wood Acked-by: Stephen Boyd Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qoriq-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index d308c4de467d..71b640c8c1a5 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -285,6 +285,7 @@ static const struct of_device_id node_matches[] __initconst = { { .compatible = "fsl,ls1046a-clockgen", }, { .compatible = "fsl,ls1088a-clockgen", }, { .compatible = "fsl,ls2080a-clockgen", }, + { .compatible = "fsl,lx2160a-clockgen", }, { .compatible = "fsl,p4080-clockgen", }, { .compatible = "fsl,qoriq-clockgen-1.0", }, { .compatible = "fsl,qoriq-clockgen-2.0", }, From 75b0f8473fed6941e0ae01da082ec8b3d58adb9b Mon Sep 17 00:00:00 2001 From: dongjian Date: Sun, 28 Apr 2019 16:54:17 +0800 Subject: [PATCH 70/72] cpufreq: centrino: Fix centrino_setpolicy() kerneldoc comment The code is using centrino_target() rather than centrino_setpolicy(). Signed-off-by: dongjian Acked-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-centrino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index a1fb735685db..e086b2dd4072 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -412,7 +412,7 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) } /** - * centrino_setpolicy - set a new CPUFreq policy + * centrino_target - set a new CPUFreq policy * @policy: new policy * @index: index of target frequency * From 8db82563451f976597ab7b282ec655e4390a4088 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 8 Mar 2019 17:47:10 +0100 Subject: [PATCH 71/72] cpufreq: armada-37xx: fix frequency calculation for opp The frequency calculation was based on the current(max) frequency of the CPU. However for low frequency, the value used was already the parent frequency divided by a factor of 2. Instead of using this frequency, this fix directly get the frequency from the parent clock. Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx") Cc: Reported-by: Christian Neubert Signed-off-by: Gregory CLEMENT Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 75491fc841a6..0df16eb1eb3c 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void) struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; unsigned long freq; - unsigned int cur_frequency; + unsigned int cur_frequency, base_frequency; struct regmap *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; - struct clk *clk; + struct clk *clk, *parent; nb_pm_base = syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); @@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void) return PTR_ERR(clk); } + parent = clk_get_parent(clk); + if (IS_ERR(parent)) { + dev_err(cpu_dev, "Cannot get parent clock for CPU0\n"); + clk_put(clk); + return PTR_ERR(parent); + } + + /* Get parent CPU frequency */ + base_frequency = clk_get_rate(parent); + + if (!base_frequency) { + dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n"); + clk_put(clk); + return -EINVAL; + } + /* Get nominal (current) CPU frequency */ cur_frequency = clk_get_rate(clk); if (!cur_frequency) { @@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void) for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000; - freq = cur_frequency / dvfs->divider[load_lvl]; + freq = base_frequency / dvfs->divider[load_lvl]; ret = dev_pm_opp_add(cpu_dev, freq, u_volt); if (ret) goto remove_opp; From 4ebe36c94aed95de71a8ce6a6762226d31c938ee Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 30 Apr 2019 11:35:52 +0530 Subject: [PATCH 72/72] cpufreq: Fix kobject memleak Currently the error return path from kobject_init_and_add() is not followed by a call to kobject_put() - which means we are leaking the kobject. Fix it by adding a call to kobject_put() in the error path of kobject_init_and_add(). Signed-off-by: Viresh Kumar Reviewed-by: Tobin C. Harding Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + drivers/cpufreq/cpufreq_governor.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 92604afdeec4..7ea217c88c2e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1133,6 +1133,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) cpufreq_global_kobject, "policy%u", cpu); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + kobject_put(&policy->kobj); goto err_free_real_cpus; } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ffa9adeaba31..9d1d9bf02710 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -459,6 +459,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) /* Failure, so roll back. */ pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); + kobject_put(&dbs_data->attr_set.kobj); + policy->governor_data = NULL; if (!have_governor_per_policy())