From 108cc2e7d212c7d52694fb400423da807e1e5fe4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 12:42:50 +0300 Subject: [PATCH 01/46] SFI: fix compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/sfi/sfi_core.c:164:26: warning: no previous prototype for ‘sfi_map_table’ [-Wmissing-prototypes] drivers/sfi/sfi_core.c:192:6: warning: no previous prototype for ‘sfi_unmap_table’ [-Wmissing-prototypes] Signed-off-by: Andy Shevchenko Signed-off-by: Len Brown --- drivers/sfi/sfi_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c index 1e824fb1649b..296db7a69c27 100644 --- a/drivers/sfi/sfi_core.c +++ b/drivers/sfi/sfi_core.c @@ -161,7 +161,7 @@ static int sfi_verify_table(struct sfi_table_header *table) * Check for common case that we can re-use mapping to SYST, * which requires syst_pa, syst_va to be initialized. */ -struct sfi_table_header *sfi_map_table(u64 pa) +static struct sfi_table_header *sfi_map_table(u64 pa) { struct sfi_table_header *th; u32 length; @@ -189,7 +189,7 @@ struct sfi_table_header *sfi_map_table(u64 pa) * Undoes effect of sfi_map_table() by unmapping table * if it did not completely fit on same page as SYST. */ -void sfi_unmap_table(struct sfi_table_header *th) +static void sfi_unmap_table(struct sfi_table_header *th) { if (!TABLE_ON_PAGE(syst_va, th, th->len)) sfi_unmap_memory(th, TABLE_ON_PAGE(th, th, th->len) ? From e7ddf4b7b3c5fe64902c4fb9edac92532c87cd75 Mon Sep 17 00:00:00 2001 From: Srinidhi Kasagar Date: Fri, 19 Dec 2014 23:13:51 +0530 Subject: [PATCH 02/46] cpufreq: Add SFI based cpufreq driver support This adds the SFI based cpu freq driver for some of the Intel's Silvermont based Atom architectures like Z34xx and Z35xx. Signed-off-by: Rudramuni, Vishwesh M Signed-off-by: Srinidhi Kasagar Acked-by: Viresh Kumar Signed-off-by: Len Brown --- arch/x86/include/uapi/asm/msr-index.h | 1 + drivers/cpufreq/Kconfig.x86 | 10 ++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/sfi-cpufreq.c | 136 ++++++++++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 drivers/cpufreq/sfi-cpufreq.c diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index e21331ce368f..4f6dae67dd10 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -318,6 +318,7 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define INTEL_PERF_CTL_MASK 0xffff #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PERF_CTL 0xc0010062 diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 89ae88f91895..c59bdcb83217 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -57,6 +57,16 @@ config X86_ACPI_CPUFREQ_CPB By enabling this option the acpi_cpufreq driver provides the old entry in addition to the new boost ones, for compatibility reasons. +config X86_SFI_CPUFREQ + tristate "SFI Performance-States driver" + depends on X86_INTEL_MID && SFI + help + This adds a CPUFreq driver for some Silvermont based Intel Atom + architectures like Z34xx and Z35xx which enumerate processor + performance states through SFI. + + If in doubt, say N. + config ELAN_CPUFREQ tristate "AMD Elan SC400 and SC410" depends on MELAN diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 40c53dc1937e..2e50f55e14d3 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o +obj-$(CONFIG_X86_SFI_CPUFREQ) += sfi-cpufreq.o ################################################################################## # ARM SoC drivers diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c new file mode 100644 index 000000000000..ffa3389e535b --- /dev/null +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -0,0 +1,136 @@ +/* + * SFI Performance States Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Author: Vishwesh M Rudramuni + * Author: Srinidhi Kasagar + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +struct cpufreq_frequency_table *freq_table; +static struct sfi_freq_table_entry *sfi_cpufreq_array; +static int num_freq_table_entries; + +static int sfi_parse_freq(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_freq_table_entry *pentry; + int totallen; + + sb = (struct sfi_table_simple *)table; + num_freq_table_entries = SFI_GET_NUM_ENTRIES(sb, + struct sfi_freq_table_entry); + if (num_freq_table_entries <= 1) { + pr_err("No p-states discovered\n"); + return -ENODEV; + } + + pentry = (struct sfi_freq_table_entry *)sb->pentry; + totallen = num_freq_table_entries * sizeof(*pentry); + + sfi_cpufreq_array = kzalloc(totallen, GFP_KERNEL); + if (!sfi_cpufreq_array) + return -ENOMEM; + + memcpy(sfi_cpufreq_array, pentry, totallen); + + return 0; +} + +static int sfi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) +{ + unsigned int next_perf_state = 0; /* Index into perf table */ + u32 lo, hi; + + next_perf_state = policy->freq_table[index].driver_data; + + rdmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, &lo, &hi); + lo = (lo & ~INTEL_PERF_CTL_MASK) | + ((u32) sfi_cpufreq_array[next_perf_state].ctrl_val & + INTEL_PERF_CTL_MASK); + wrmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, lo, hi); + + return 0; +} + +static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + policy->shared_type = CPUFREQ_SHARED_TYPE_HW; + policy->cpuinfo.transition_latency = 100000; /* 100us */ + + return cpufreq_table_validate_and_show(policy, freq_table); +} + +static struct cpufreq_driver sfi_cpufreq_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = sfi_cpufreq_target, + .init = sfi_cpufreq_cpu_init, + .name = "sfi-cpufreq", + .attr = cpufreq_generic_attr, +}; + +static int __init sfi_cpufreq_init(void) +{ + int ret, i; + + /* parse the freq table from SFI */ + ret = sfi_table_parse(SFI_SIG_FREQ, NULL, NULL, sfi_parse_freq); + if (ret) + return ret; + + freq_table = kzalloc(sizeof(*freq_table) * + (num_freq_table_entries + 1), GFP_KERNEL); + if (!freq_table) { + ret = -ENOMEM; + goto err_free_array; + } + + for (i = 0; i < num_freq_table_entries; i++) { + freq_table[i].driver_data = i; + freq_table[i].frequency = sfi_cpufreq_array[i].freq_mhz * 1000; + } + freq_table[i].frequency = CPUFREQ_TABLE_END; + + ret = cpufreq_register_driver(&sfi_cpufreq_driver); + if (ret) + goto err_free_tbl; + + return ret; + +err_free_tbl: + kfree(freq_table); +err_free_array: + kfree(sfi_cpufreq_array); + return ret; +} +late_initcall(sfi_cpufreq_init); + +static void __exit sfi_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&sfi_cpufreq_driver); + kfree(freq_table); + kfree(sfi_cpufreq_array); +} +module_exit(sfi_cpufreq_exit); + +MODULE_AUTHOR("Vishwesh M Rudramuni "); +MODULE_DESCRIPTION("SFI Performance-States Driver"); +MODULE_LICENSE("GPL"); From a6a919b6ae7c438c89c4c7349c8bc1e48a30ef4d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 Dec 2014 22:14:26 +0100 Subject: [PATCH 03/46] cpufreq: drop owner assignment from platform_drivers This platform_driver does not need to set an owner, it will be populated by the driver core. Signed-off-by: Wolfram Sang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ls1x-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/ls1x-cpufreq.c b/drivers/cpufreq/ls1x-cpufreq.c index 25fbd6a1374f..f0913eee2f50 100644 --- a/drivers/cpufreq/ls1x-cpufreq.c +++ b/drivers/cpufreq/ls1x-cpufreq.c @@ -210,7 +210,6 @@ out: static struct platform_driver ls1x_cpufreq_platdrv = { .driver = { .name = "ls1x-cpufreq", - .owner = THIS_MODULE, }, .probe = ls1x_cpufreq_probe, .remove = ls1x_cpufreq_remove, From 90de2a4aa9f355b55e57eaf2fd584897dd0adf8c Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Tue, 23 Dec 2014 22:09:48 -0800 Subject: [PATCH 04/46] cpufreq: suspend cpufreq governors on shutdown We should stop cpufreq governors when we shut down the system. If we don't do this, we can end up with this deadlock: 1. cpufreq governor may be running on a CPU other than CPU0. 2. In machine_restart() we call smp_send_stop() which stops CPUs. If one of these CPUs was actively running a cpufreq governor then it may have the mutex / spinlock needed to access the main PMIC in the system (perhaps over I2C) 3. If a machine needs access to the main PMIC in order to shutdown then it will never get it since the mutex was lost when the other CPU stopped. 4. We'll hang (possibly eventually hitting the hard lockup detector). Let's avoid the problem by stopping the cpufreq governor at shutdown, which is a sensible thing to do anyway. Signed-off-by: Doug Anderson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 46bed4f81cde..dad7235971ba 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -2556,6 +2557,14 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); +/* + * Stop cpufreq at shutdown to make sure it isn't holding any locks + * or mutexes when secondary CPUs are halted. + */ +static struct syscore_ops cpufreq_syscore_ops = { + .shutdown = cpufreq_suspend, +}; + static int __init cpufreq_core_init(void) { if (cpufreq_disabled()) @@ -2564,6 +2573,8 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create(); BUG_ON(!cpufreq_global_kobject); + register_syscore_ops(&cpufreq_syscore_ops); + return 0; } core_initcall(cpufreq_core_init); From e673f1639bc565ce3509ed36bc3987c2af407cd2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:22 +0530 Subject: [PATCH 05/46] cpufreq: remove dangling comment It doesn't make any sense at all and is a leftover of some earlier commit. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index dad7235971ba..a95cdc5aa3c5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2007,10 +2007,6 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); -/* - * when "event" is CPUFREQ_GOV_LIMITS - */ - static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { From f13f1184a1f4e046306c33e542b4d654866e5d70 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:23 +0530 Subject: [PATCH 06/46] cpufreq: remove extra parenthesis We can live without it and so we should. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a95cdc5aa3c5..a180bb40054c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -903,7 +903,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { + while (drv_attr && *drv_attr) { ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) return ret; From 09347b2905169b361e8ff5d75308982165d61c13 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:24 +0530 Subject: [PATCH 07/46] cpufreq: don't need line break in show_scaling_cur_freq() No need of an unnecessary line break. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a180bb40054c..4c70f7aa9b1e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -514,8 +514,7 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -static ssize_t show_scaling_cur_freq( - struct cpufreq_policy *policy, char *buf) +static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; From db5f299574267c50067b967f898633d4dec7ecc4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:25 +0530 Subject: [PATCH 08/46] cpufreq: merge 'if' blocks in __cpufreq_remove_dev_prepare() There are two 'if' blocks here, checking for !cpufreq_driver->setpolicy and has_target(). Both are actually doing the same thing, merge them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4c70f7aa9b1e..626ef841479a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1371,11 +1371,10 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, pr_err("%s: Failed to stop governor\n", __func__); return ret; } - } - if (!cpufreq_driver->setpolicy) strncpy(per_cpu(cpufreq_cpu_governor, cpu), policy->governor->name, CPUFREQ_NAME_LEN); + } down_read(&policy->rwsem); cpus = cpumask_weight(policy->cpus); From 42f91fa1166f1d6c9fba37f167dd3c3f846be00a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:26 +0530 Subject: [PATCH 09/46] cpufreq: s/__find_governor/find_governor Remove unnecessary from find_governor's name. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 626ef841479a..faffdaaba343 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -433,7 +433,7 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, } define_one_global_rw(boost); -static struct cpufreq_governor *__find_governor(const char *str_governor) +static struct cpufreq_governor *find_governor(const char *str_governor) { struct cpufreq_governor *t; @@ -469,7 +469,7 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_lock(&cpufreq_governor_mutex); - t = __find_governor(str_governor); + t = find_governor(str_governor); if (t == NULL) { int ret; @@ -479,7 +479,7 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_lock(&cpufreq_governor_mutex); if (ret == 0) - t = __find_governor(str_governor); + t = find_governor(str_governor); } if (t != NULL) { @@ -936,7 +936,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) memcpy(&new_policy, policy, sizeof(*policy)); /* Update governor of new_policy to the governor used before hotplug */ - gov = __find_governor(per_cpu(cpufreq_cpu_governor, policy->cpu)); + gov = find_governor(per_cpu(cpufreq_cpu_governor, policy->cpu)); if (gov) pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); @@ -2102,7 +2102,7 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) governor->initialized = 0; err = -EBUSY; - if (__find_governor(governor->name) == NULL) { + if (!find_governor(governor->name)) { err = 0; list_add(&governor->governor_list, &cpufreq_governor_list); } From 2e1cc3a5d7efa42d3aa4aaa77b9b2c3a51bb6073 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:27 +0530 Subject: [PATCH 10/46] cpufreq: No need to check for has_target() Either we can be setpolicy or target type, nothing else. And so the else part of setpolicy will automatically be of has_target() type. And so we don't need to check it again. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index faffdaaba343..1454ab3f0023 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -464,7 +464,7 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, *policy = CPUFREQ_POLICY_POWERSAVE; err = 0; } - } else if (has_target()) { + } else { struct cpufreq_governor *t; mutex_lock(&cpufreq_governor_mutex); From a1e1dc41c4efb41e1e402a6b5a45b75687030664 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:28 +0530 Subject: [PATCH 11/46] cpufreq: pass policy to cpufreq_out_of_sync There is no point finding out the 'policy' again within cpufreq_out_of_sync() when all the callers already have it. Just make them pass policy instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1454ab3f0023..0a9e764a6e2d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1509,30 +1509,23 @@ static void handle_update(struct work_struct *work) /** * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're * in deep trouble. - * @cpu: cpu number - * @old_freq: CPU frequency the kernel thinks the CPU runs at + * @policy: policy managing CPUs * @new_freq: CPU frequency the CPU actually runs at * * We adjust to current frequency first, and need to clean up later. * So either call to cpufreq_update_policy() or schedule handle_update()). */ -static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, +static void cpufreq_out_of_sync(struct cpufreq_policy *policy, unsigned int new_freq) { - struct cpufreq_policy *policy; struct cpufreq_freqs freqs; - unsigned long flags; pr_debug("Warning: CPU frequency out of sync: cpufreq and timing core thinks of %u, is %u kHz\n", - old_freq, new_freq); + policy->cur, new_freq); - freqs.old = old_freq; + freqs.old = policy->cur; freqs.new = new_freq; - read_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - cpufreq_freq_transition_begin(policy, &freqs); cpufreq_freq_transition_end(policy, &freqs, 0); } @@ -1597,7 +1590,7 @@ static unsigned int __cpufreq_get(unsigned int cpu) /* verify no discrepancy between actual and saved value exists */ if (unlikely(ret_freq != policy->cur)) { - cpufreq_out_of_sync(cpu, policy->cur, ret_freq); + cpufreq_out_of_sync(policy, ret_freq); schedule_work(&policy->update); } } @@ -2302,8 +2295,7 @@ int cpufreq_update_policy(unsigned int cpu) policy->cur = new_policy.cur; } else { if (policy->cur != new_policy.cur && has_target()) - cpufreq_out_of_sync(cpu, policy->cur, - new_policy.cur); + cpufreq_out_of_sync(policy, new_policy.cur); } } From d92d50a4622b55cf114033309286696b3bd0fa5b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:29 +0530 Subject: [PATCH 12/46] cpufreq: pass policy to __cpufreq_get() There is no point finding out the 'policy' again within __cpufreq_get() when all the callers already have it. Just make them pass policy instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0a9e764a6e2d..420f270a0475 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -63,7 +63,7 @@ static DECLARE_RWSEM(cpufreq_rwsem); /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); -static unsigned int __cpufreq_get(unsigned int cpu); +static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static void handle_update(struct work_struct *work); /** @@ -563,7 +563,7 @@ store_one(scaling_max_freq, max); static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, char *buf) { - unsigned int cur_freq = __cpufreq_get(policy->cpu); + unsigned int cur_freq = __cpufreq_get(policy); if (!cur_freq) return sprintf(buf, ""); return sprintf(buf, "%u\n", cur_freq); @@ -1575,15 +1575,14 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_quick_get_max); -static unsigned int __cpufreq_get(unsigned int cpu) +static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); unsigned int ret_freq = 0; if (!cpufreq_driver->get) return ret_freq; - ret_freq = cpufreq_driver->get(cpu); + ret_freq = cpufreq_driver->get(policy->cpu); if (ret_freq && policy->cur && !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { @@ -1611,7 +1610,7 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - ret_freq = __cpufreq_get(cpu); + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); From bc68b7dfda54373fb5d0cee8c44a070d590b8433 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:30 +0530 Subject: [PATCH 13/46] cpufreq: update driver_data->flags only if we are registering driver We should first check if a cpufreq driver is already registered or not before updating driver_data->flags. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 420f270a0475..0247da2a3350 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2440,9 +2440,6 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) pr_debug("trying to register driver %s\n", driver_data->name); - if (driver_data->setpolicy) - driver_data->flags |= CPUFREQ_CONST_LOOPS; - write_lock_irqsave(&cpufreq_driver_lock, flags); if (cpufreq_driver) { write_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -2451,6 +2448,9 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + if (driver_data->setpolicy) + driver_data->flags |= CPUFREQ_CONST_LOOPS; + if (cpufreq_boost_supported()) { /* * Check if driver provides function to enable boost - From 22a7cfb01415cfa4284af20555942c427af452e0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:31 +0530 Subject: [PATCH 14/46] cpufreq: get rid of CONFIG_{HOTPLUG_CPU|SMP} mess These are messing up more than the benefit they provide. It isn't a lot of code anyway, that we will compile without them. Kill them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0247da2a3350..1e72621ca157 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -958,7 +958,6 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) } } -#ifdef CONFIG_HOTPLUG_CPU static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, struct device *dev) { @@ -996,7 +995,6 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); } -#endif static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) { @@ -1107,19 +1105,15 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { unsigned int j, cpu = dev->id; int ret = -ENOMEM; - struct cpufreq_policy *policy; + struct cpufreq_policy *policy, *tpolicy; unsigned long flags; bool recover_policy = cpufreq_suspended; -#ifdef CONFIG_HOTPLUG_CPU - struct cpufreq_policy *tpolicy; -#endif if (cpu_is_offline(cpu)) return 0; pr_debug("adding CPU %u\n", cpu); -#ifdef CONFIG_SMP /* check whether a different CPU already registered this * CPU because it is in the same boat. */ policy = cpufreq_cpu_get(cpu); @@ -1127,12 +1121,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) cpufreq_cpu_put(policy); return 0; } -#endif if (!down_read_trylock(&cpufreq_rwsem)) return 0; -#ifdef CONFIG_HOTPLUG_CPU /* Check if this cpu was hot-unplugged earlier and has siblings */ read_lock_irqsave(&cpufreq_driver_lock, flags); list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) { @@ -1144,7 +1136,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) } } read_unlock_irqrestore(&cpufreq_driver_lock, flags); -#endif /* * Restore the saved policy when doing light-weight init and fall back From 7f0c020ab6ecb513394de9b02df33a5cfc62084f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:32 +0530 Subject: [PATCH 15/46] cpufreq: get rid of 'tpolicy' from __cpufreq_add_dev() There is no need of this separate variable, use 'policy' instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1e72621ca157..127cc6f18ca5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1105,7 +1105,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { unsigned int j, cpu = dev->id; int ret = -ENOMEM; - struct cpufreq_policy *policy, *tpolicy; + struct cpufreq_policy *policy; unsigned long flags; bool recover_policy = cpufreq_suspended; @@ -1127,10 +1127,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Check if this cpu was hot-unplugged earlier and has siblings */ read_lock_irqsave(&cpufreq_driver_lock, flags); - list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) { - if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) { + list_for_each_entry(policy, &cpufreq_policy_list, policy_list) { + if (cpumask_test_cpu(cpu, policy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev); + ret = cpufreq_add_policy_cpu(policy, cpu, dev); up_read(&cpufreq_rwsem); return ret; } From d7a9771c1a37af8d0cd0cf97723aeea88b9de8bb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:33 +0530 Subject: [PATCH 16/46] cpufreq: use light-weight cpufreq_cpu_get_raw() in __cpufreq_add_dev() We just need to check if a 'policy' is already present for the cpu we are adding. We don't need to take all the locks and do kobject usage updates. Use the light-weight cpufreq_cpu_get_raw() routine instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 127cc6f18ca5..3a415130f6e1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1116,11 +1116,9 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* check whether a different CPU already registered this * CPU because it is in the same boat. */ - policy = cpufreq_cpu_get(cpu); - if (unlikely(policy)) { - cpufreq_cpu_put(policy); + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(policy)) return 0; - } if (!down_read_trylock(&cpufreq_rwsem)) return 0; From 39c132eebdd26a3041a92fc64fc1e61293eb39da Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:34 +0530 Subject: [PATCH 17/46] cpufreq: limit the scope of l_p_j variables These variables are just used within adjust_jiffies() and so must be local to it. Also there is no need of a dummy routine for CONFIG_SMP case as we can take care of all that with help of macros in the same routine. It doesn't look that ugly. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3a415130f6e1..96abdcd99424 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -250,12 +250,12 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put); * systems as each CPU might be scaled differently. So, use the arch * per-CPU loops_per_jiffy value wherever possible. */ -#ifndef CONFIG_SMP -static unsigned long l_p_j_ref; -static unsigned int l_p_j_ref_freq; - static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) { +#ifndef CONFIG_SMP + static unsigned long l_p_j_ref; + static unsigned int l_p_j_ref_freq; + if (ci->flags & CPUFREQ_CONST_LOOPS) return; @@ -271,13 +271,8 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) pr_debug("scaling loops_per_jiffy to %lu for frequency %u kHz\n", loops_per_jiffy, ci->new); } -} -#else -static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) -{ - return; -} #endif +} static void __cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) From ce1bcfe94db895cbd6876e176af5824742b29e25 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:35 +0530 Subject: [PATCH 18/46] cpufreq: check cpufreq_policy_list instead of scanning policies for all CPUs CPUFREQ_STICKY flag is set by drivers which don't want to get unregistered even if cpufreq-core isn't able to initialize policy for any CPU. When this flag isn't set, we try to unregister the driver. To find out which CPUs are registered and which are not, we try to check per_cpu cpufreq_cpu_data for all CPUs. Because we have a list of valid policies available now, we better check if the list is empty or not instead of the 'for' loop. That will be much more efficient. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 96abdcd99424..09f333f4df88 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2455,23 +2455,12 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (ret) goto err_boost_unreg; - if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) { - int i; - ret = -ENODEV; - - /* check for at least one working CPU */ - for (i = 0; i < nr_cpu_ids; i++) - if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) { - ret = 0; - break; - } - + if (!(cpufreq_driver->flags & CPUFREQ_STICKY) && + list_empty(&cpufreq_policy_list)) { /* if all ->init() calls failed, unregister */ - if (ret) { - pr_debug("no CPU initialized for driver %s\n", - driver_data->name); - goto err_if_unreg; - } + pr_debug("%s: No CPU initialized for driver %s\n", __func__, + driver_data->name); + goto err_if_unreg; } register_hotcpu_notifier(&cpufreq_cpu_notifier); From 818c57126e101abf87741131298354258396480c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 2 Jan 2015 12:34:38 +0530 Subject: [PATCH 19/46] cpufreq: move some initialization stuff to cpufreq_policy_alloc() We need to initialize completion and work only on policy allocation and not really on the policy restore side and so we better move this piece of code to cpufreq_policy_alloc(). Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 09f333f4df88..70b568a253e9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1026,6 +1026,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(void) init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); + init_completion(&policy->kobj_unregister); + INIT_WORK(&policy->update, handle_update); return policy; @@ -1155,9 +1157,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) cpumask_copy(policy->cpus, cpumask_of(cpu)); - init_completion(&policy->kobj_unregister); - INIT_WORK(&policy->update, handle_update); - /* call driver. From then on the cpufreq must be able * to accept all calls to ->verify and ->setpolicy for this CPU */ From 00d0b29472d1605851c12c77aaa3664413a71fbf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:00 +0530 Subject: [PATCH 20/46] cpufreq: stats: Improve module description string The MODULE_DESCRIPTION() string is just too long and then is broken into multiple lines just to make checkpatch happy. Rewrite it to make it more precise. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 0cd9b4dcef99..80801f880dd8 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -374,8 +374,7 @@ static void __exit cpufreq_stats_exit(void) } MODULE_AUTHOR("Zou Nan hai "); -MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats " - "through sysfs filesystem"); +MODULE_DESCRIPTION("Export cpufreq stats via sysfs"); MODULE_LICENSE("GPL"); module_init(cpufreq_stats_init); From b8c674482f3cd0d93d07dbc2518cc96c3cc24ed0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:01 +0530 Subject: [PATCH 21/46] cpufreq: stats: return -EEXIST when stats are already allocated __cpufreq_stats_create_table() is called from: - cpufreq notifier on creation of a new policy. Stats will always be NULL here. - cpufreq_stats_init() for all CPUs as cpufreq-stats might have been initialized after cpufreq driver. For any policy, 'stats' will be NULL for the first CPU only and will be valid for all other CPUs managed by the same policy. While we return for other CPUs, we don't return the right error value. It's not that we would fail with -EBUSY. But generally, this is what these return values mean: - EBUSY: we are busy right now, try again. And the retry attempt might be immediate. - EEXIST: We already have what you are trying to create and there is no need to create it again, and so no more tries are required. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 80801f880dd8..d2299ca2fc2c 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -192,8 +192,10 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) if (unlikely(!table)) return 0; + /* stats already initialized */ if (per_cpu(cpufreq_stats_table, cpu)) - return -EBUSY; + return -EEXIST; + stat = kzalloc(sizeof(*stat), GFP_KERNEL); if ((stat) == NULL) return -ENOMEM; From 43b9cdaf5c22230269b8edd99ec78380a6ac0715 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:02 +0530 Subject: [PATCH 22/46] cpufreq: stats: remove unused cpufreq_stats_attribute It was never used, but is there since the first commit. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index d2299ca2fc2c..14f8d858a63d 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -33,11 +33,6 @@ struct cpufreq_stats { static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); -struct cpufreq_stats_attribute { - struct attribute attr; - ssize_t(*show) (struct cpufreq_stats *, char *); -}; - static int cpufreq_stats_update(unsigned int cpu) { struct cpufreq_stats *stat; From 9531347c61f689ac6de899a03ce91be4157277f7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:03 +0530 Subject: [PATCH 23/46] cpufreq: stats: initialize 'cur_time' on its definition 'cur_time' is defined in the first line and is then assigned a value in the next line. Initialize it while defining it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 14f8d858a63d..21bec770569d 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -36,9 +36,8 @@ static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); static int cpufreq_stats_update(unsigned int cpu) { struct cpufreq_stats *stat; - unsigned long long cur_time; + unsigned long long cur_time = get_jiffies_64(); - cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); stat = per_cpu(cpufreq_stats_table, cpu); if (stat->time_in_state) From f93dbbbd108936b18b3230af36d9a36866ce69a7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:04 +0530 Subject: [PATCH 24/46] cpufreq: stats: don't check for freq table while freeing stats While we allocate stats, we do need to check if freq-table is present or not as we need to use it then. But while freeing stats, all we need to know is if stats holds a valid pointer value. There is no use of testing if cpufreq table is present or not. Don't check it. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 21bec770569d..e9d68420b876 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -168,8 +168,7 @@ static void cpufreq_stats_free_table(unsigned int cpu) if (!policy) return; - if (cpufreq_frequency_get_table(policy->cpu)) - __cpufreq_stats_free_table(policy); + __cpufreq_stats_free_table(policy); cpufreq_cpu_put(policy); } From 2aba0c1bae564a59fc38c407abf7985f9e347cc4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:05 +0530 Subject: [PATCH 25/46] cpufreq: stats: pass 'stat' to cpufreq_stats_update() It is better to pass a struct cpufreq_stats pointer to cpufreq_stats_update() instead of a CPU number, because that's all it needs. Even if we pass a cpu number to cpufreq_stats_update(), it reads the per-cpu variable to get 'stats' out of it. So we are doing these operations unnecessarily: - First getting the cpu number to pass to cpufreq_stats_update(), stat->cpu. - And then getting stats from the cpu, per_cpu(cpufreq_stats_table, cpu). Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index e9d68420b876..6c234f548601 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -33,13 +33,11 @@ struct cpufreq_stats { static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); -static int cpufreq_stats_update(unsigned int cpu) +static int cpufreq_stats_update(struct cpufreq_stats *stat) { - struct cpufreq_stats *stat; unsigned long long cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); - stat = per_cpu(cpufreq_stats_table, cpu); if (stat->time_in_state) stat->time_in_state[stat->last_index] += cur_time - stat->last_time; @@ -64,7 +62,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); if (!stat) return 0; - cpufreq_stats_update(stat->cpu); + cpufreq_stats_update(stat); for (i = 0; i < stat->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], (unsigned long long) @@ -82,7 +80,7 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); if (!stat) return 0; - cpufreq_stats_update(stat->cpu); + cpufreq_stats_update(stat); len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i < stat->state_num; i++) { @@ -307,7 +305,7 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, if (old_index == -1 || new_index == -1) return 0; - cpufreq_stats_update(freq->cpu); + cpufreq_stats_update(stat); if (old_index == new_index) return 0; From a9aaf2915ee265735c28b764551d084e61a694e0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 13 Jan 2015 11:34:00 +0530 Subject: [PATCH 26/46] cpufreq: stats: get rid of per-cpu cpufreq_stats_table All CPUs sharing a cpufreq policy share stats too. For this reason, add a stats pointer to struct cpufreq_policy and drop per-CPU variable cpufreq_stats_table used for accessing cpufreq stats so as to reduce code complexity. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 62 +++++++++++++++------------------ include/linux/cpufreq.h | 3 ++ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 6c234f548601..3792b2e2f4a8 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -31,8 +31,6 @@ struct cpufreq_stats { #endif }; -static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); - static int cpufreq_stats_update(struct cpufreq_stats *stat) { unsigned long long cur_time = get_jiffies_64(); @@ -48,20 +46,15 @@ static int cpufreq_stats_update(struct cpufreq_stats *stat) static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) { - struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); - if (!stat) - return 0; - return sprintf(buf, "%d\n", - per_cpu(cpufreq_stats_table, stat->cpu)->total_trans); + return sprintf(buf, "%d\n", policy->stats->total_trans); } static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) { + struct cpufreq_stats *stat = policy->stats; ssize_t len = 0; int i; - struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); - if (!stat) - return 0; + cpufreq_stats_update(stat); for (i = 0; i < stat->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], @@ -74,12 +67,10 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) #ifdef CONFIG_CPU_FREQ_STAT_DETAILS static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) { + struct cpufreq_stats *stat = policy->stats; ssize_t len = 0; int i, j; - struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); - if (!stat) - return 0; cpufreq_stats_update(stat); len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); @@ -145,8 +136,9 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) { - struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); + struct cpufreq_stats *stat = policy->stats; + /* Already freed */ if (!stat) return; @@ -155,7 +147,7 @@ static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) sysfs_remove_group(&policy->kobj, &stats_attr_group); kfree(stat->time_in_state); kfree(stat); - per_cpu(cpufreq_stats_table, policy->cpu) = NULL; + policy->stats = NULL; } static void cpufreq_stats_free_table(unsigned int cpu) @@ -184,7 +176,7 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) return 0; /* stats already initialized */ - if (per_cpu(cpufreq_stats_table, cpu)) + if (policy->stats) return -EEXIST; stat = kzalloc(sizeof(*stat), GFP_KERNEL); @@ -196,7 +188,7 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) goto error_out; stat->cpu = cpu; - per_cpu(cpufreq_stats_table, cpu) = stat; + policy->stats = stat; cpufreq_for_each_valid_entry(pos, table) count++; @@ -231,7 +223,7 @@ error_alloc: sysfs_remove_group(&policy->kobj, &stats_attr_group); error_out: kfree(stat); - per_cpu(cpufreq_stats_table, cpu) = NULL; + policy->stats = NULL; return ret; } @@ -254,15 +246,7 @@ static void cpufreq_stats_create_table(unsigned int cpu) static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy) { - struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, - policy->last_cpu); - - pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n", - policy->cpu, policy->last_cpu); - per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table, - policy->last_cpu); - per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL; - stat->cpu = policy->cpu; + policy->stats->cpu = policy->cpu; } static int cpufreq_stat_notifier_policy(struct notifier_block *nb, @@ -288,27 +272,36 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; + struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu); struct cpufreq_stats *stat; int old_index, new_index; - if (val != CPUFREQ_POSTCHANGE) + if (!policy) { + pr_err("%s: No policy found\n", __func__); return 0; + } - stat = per_cpu(cpufreq_stats_table, freq->cpu); - if (!stat) - return 0; + if (val != CPUFREQ_POSTCHANGE) + goto put_policy; + + if (!policy->stats) { + pr_debug("%s: No stats found\n", __func__); + goto put_policy; + } + + stat = policy->stats; old_index = stat->last_index; new_index = freq_table_get_index(stat, freq->new); /* We can't do stat->time_in_state[-1]= .. */ if (old_index == -1 || new_index == -1) - return 0; + goto put_policy; cpufreq_stats_update(stat); if (old_index == new_index) - return 0; + goto put_policy; spin_lock(&cpufreq_stats_lock); stat->last_index = new_index; @@ -317,6 +310,9 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, #endif stat->total_trans++; spin_unlock(&cpufreq_stats_lock); + +put_policy: + cpufreq_cpu_put(policy); return 0; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4d078cebafd2..60b7b496565d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -113,6 +113,9 @@ struct cpufreq_policy { wait_queue_head_t transition_wait; struct task_struct *transition_task; /* Task which is doing the transition */ + /* cpufreq-stats */ + struct cpufreq_stats *stats; + /* For cpufreq driver's internal use */ void *driver_data; }; From 5094160786ede5c08a6901648787a5bf9b0dc2e1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:07 +0530 Subject: [PATCH 27/46] cpufreq: stats: rename 'struct cpufreq_stats' objects as 'stats' Currently we name objects of 'struct cpufreq_stats' as 'stat' and 'stats'. Use 'stats' to make it consistent. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 102 ++++++++++++++++---------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 3792b2e2f4a8..cdc5233cf0c4 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -31,15 +31,15 @@ struct cpufreq_stats { #endif }; -static int cpufreq_stats_update(struct cpufreq_stats *stat) +static int cpufreq_stats_update(struct cpufreq_stats *stats) { unsigned long long cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); - if (stat->time_in_state) - stat->time_in_state[stat->last_index] += - cur_time - stat->last_time; - stat->last_time = cur_time; + if (stats->time_in_state) + stats->time_in_state[stats->last_index] += + cur_time - stats->last_time; + stats->last_time = cur_time; spin_unlock(&cpufreq_stats_lock); return 0; } @@ -51,15 +51,15 @@ static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf) static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) { - struct cpufreq_stats *stat = policy->stats; + struct cpufreq_stats *stats = policy->stats; ssize_t len = 0; int i; - cpufreq_stats_update(stat); - for (i = 0; i < stat->state_num; i++) { - len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], + cpufreq_stats_update(stats); + for (i = 0; i < stats->state_num; i++) { + len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], (unsigned long long) - jiffies_64_to_clock_t(stat->time_in_state[i])); + jiffies_64_to_clock_t(stats->time_in_state[i])); } return len; } @@ -67,36 +67,36 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) #ifdef CONFIG_CPU_FREQ_STAT_DETAILS static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) { - struct cpufreq_stats *stat = policy->stats; + struct cpufreq_stats *stats = policy->stats; ssize_t len = 0; int i, j; - cpufreq_stats_update(stat); + cpufreq_stats_update(stats); len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); - for (i = 0; i < stat->state_num; i++) { + for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", - stat->freq_table[i]); + stats->freq_table[i]); } if (len >= PAGE_SIZE) return PAGE_SIZE; len += snprintf(buf + len, PAGE_SIZE - len, "\n"); - for (i = 0; i < stat->state_num; i++) { + for (i = 0; i < stats->state_num; i++) { if (len >= PAGE_SIZE) break; len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ", - stat->freq_table[i]); + stats->freq_table[i]); - for (j = 0; j < stat->state_num; j++) { + for (j = 0; j < stats->state_num; j++) { if (len >= PAGE_SIZE) break; len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", - stat->trans_table[i*stat->max_state+j]); + stats->trans_table[i*stats->max_state+j]); } if (len >= PAGE_SIZE) break; @@ -125,28 +125,28 @@ static struct attribute_group stats_attr_group = { .name = "stats" }; -static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) +static int freq_table_get_index(struct cpufreq_stats *stats, unsigned int freq) { int index; - for (index = 0; index < stat->max_state; index++) - if (stat->freq_table[index] == freq) + for (index = 0; index < stats->max_state; index++) + if (stats->freq_table[index] == freq) return index; return -1; } static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) { - struct cpufreq_stats *stat = policy->stats; + struct cpufreq_stats *stats = policy->stats; /* Already freed */ - if (!stat) + if (!stats) return; - pr_debug("%s: Free stat table\n", __func__); + pr_debug("%s: Free stats table\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); - kfree(stat->time_in_state); - kfree(stat); + kfree(stats->time_in_state); + kfree(stats); policy->stats = NULL; } @@ -166,7 +166,7 @@ static void cpufreq_stats_free_table(unsigned int cpu) static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) { unsigned int i, count = 0, ret = 0; - struct cpufreq_stats *stat; + struct cpufreq_stats *stats; unsigned int alloc_size; unsigned int cpu = policy->cpu; struct cpufreq_frequency_table *pos, *table; @@ -179,16 +179,16 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) if (policy->stats) return -EEXIST; - stat = kzalloc(sizeof(*stat), GFP_KERNEL); - if ((stat) == NULL) + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if ((stats) == NULL) return -ENOMEM; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); if (ret) goto error_out; - stat->cpu = cpu; - policy->stats = stat; + stats->cpu = cpu; + policy->stats = stats; cpufreq_for_each_valid_entry(pos, table) count++; @@ -198,31 +198,31 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) #ifdef CONFIG_CPU_FREQ_STAT_DETAILS alloc_size += count * count * sizeof(int); #endif - stat->max_state = count; - stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL); - if (!stat->time_in_state) { + stats->max_state = count; + stats->time_in_state = kzalloc(alloc_size, GFP_KERNEL); + if (!stats->time_in_state) { ret = -ENOMEM; goto error_alloc; } - stat->freq_table = (unsigned int *)(stat->time_in_state + count); + stats->freq_table = (unsigned int *)(stats->time_in_state + count); #ifdef CONFIG_CPU_FREQ_STAT_DETAILS - stat->trans_table = stat->freq_table + count; + stats->trans_table = stats->freq_table + count; #endif i = 0; cpufreq_for_each_valid_entry(pos, table) - if (freq_table_get_index(stat, pos->frequency) == -1) - stat->freq_table[i++] = pos->frequency; - stat->state_num = i; + if (freq_table_get_index(stats, pos->frequency) == -1) + stats->freq_table[i++] = pos->frequency; + stats->state_num = i; spin_lock(&cpufreq_stats_lock); - stat->last_time = get_jiffies_64(); - stat->last_index = freq_table_get_index(stat, policy->cur); + stats->last_time = get_jiffies_64(); + stats->last_index = freq_table_get_index(stats, policy->cur); spin_unlock(&cpufreq_stats_lock); return 0; error_alloc: sysfs_remove_group(&policy->kobj, &stats_attr_group); error_out: - kfree(stat); + kfree(stats); policy->stats = NULL; return ret; } @@ -273,7 +273,7 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, { struct cpufreq_freqs *freq = data; struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu); - struct cpufreq_stats *stat; + struct cpufreq_stats *stats; int old_index, new_index; if (!policy) { @@ -289,26 +289,26 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, goto put_policy; } - stat = policy->stats; + stats = policy->stats; - old_index = stat->last_index; - new_index = freq_table_get_index(stat, freq->new); + old_index = stats->last_index; + new_index = freq_table_get_index(stats, freq->new); - /* We can't do stat->time_in_state[-1]= .. */ + /* We can't do stats->time_in_state[-1]= .. */ if (old_index == -1 || new_index == -1) goto put_policy; - cpufreq_stats_update(stat); + cpufreq_stats_update(stats); if (old_index == new_index) goto put_policy; spin_lock(&cpufreq_stats_lock); - stat->last_index = new_index; + stats->last_index = new_index; #ifdef CONFIG_CPU_FREQ_STAT_DETAILS - stat->trans_table[old_index * stat->max_state + new_index]++; + stats->trans_table[old_index * stats->max_state + new_index]++; #endif - stat->total_trans++; + stats->total_trans++; spin_unlock(&cpufreq_stats_lock); put_policy: From 7c418ff099110d987846c8c670479a3b90ed1dcb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:08 +0530 Subject: [PATCH 28/46] cpufreq: Remove (now) unused 'last_cpu' from struct cpufreq_policy 'last_cpu' was used only from cpufreq-stats and isn't used anymore. Get rid of it. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 --- include/linux/cpufreq.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 70b568a253e9..60ef37d569c5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1086,10 +1086,7 @@ static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, } down_write(&policy->rwsem); - - policy->last_cpu = policy->cpu; policy->cpu = cpu; - up_write(&policy->rwsem); blocking_notifier_call_chain(&cpufreq_policy_notifier_list, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 60b7b496565d..7e1a389b4e92 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -66,8 +66,6 @@ struct cpufreq_policy { unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of CPU managing this policy */ - unsigned int last_cpu; /* cpu nr of previous CPU that managed - * this policy */ struct clk *clk; struct cpufreq_cpuinfo cpuinfo;/* see above */ From c92f2125ac5338151bb5c45e371c701e107e3197 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:09 +0530 Subject: [PATCH 29/46] cpufreq: stats: drop 'cpu' field of struct cpufreq_stats 'cpu' field of struct cpufreq_stats isn't used anymore and so can be dropped. This change makes cpufreq_stats_update_policy_cpu() empty and so that is removed as well. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index cdc5233cf0c4..e1fe0a945639 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -18,7 +18,6 @@ static spinlock_t cpufreq_stats_lock; struct cpufreq_stats { - unsigned int cpu; unsigned int total_trans; unsigned long long last_time; unsigned int max_state; @@ -187,7 +186,6 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) if (ret) goto error_out; - stats->cpu = cpu; policy->stats = stats; cpufreq_for_each_valid_entry(pos, table) @@ -244,22 +242,12 @@ static void cpufreq_stats_create_table(unsigned int cpu) cpufreq_cpu_put(policy); } -static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy) -{ - policy->stats->cpu = policy->cpu; -} - static int cpufreq_stat_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) { int ret = 0; struct cpufreq_policy *policy = data; - if (val == CPUFREQ_UPDATE_POLICY_CPU) { - cpufreq_stats_update_policy_cpu(policy); - return 0; - } - if (val == CPUFREQ_CREATE_POLICY) ret = __cpufreq_stats_create_table(policy); else if (val == CPUFREQ_REMOVE_POLICY) From d9f354460db8b58a8395936d323b4ca6e8428b9d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:10 +0530 Subject: [PATCH 30/46] cpufreq: remove CPUFREQ_UPDATE_POLICY_CPU notifications CPUFREQ_UPDATE_POLICY_CPU notifications were used only from cpufreq-stats which doesn't use it anymore. Remove them. This also decrements values of other notification macros defined after CPUFREQ_UPDATE_POLICY_CPU by 1 to remove gaps. Hopefully all users are using macro's instead of direct numbers and so they wouldn't break as macro values are changed now. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 --- include/linux/cpufreq.h | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 60ef37d569c5..ca69f42b8e1e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1089,9 +1089,6 @@ static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, policy->cpu = cpu; up_write(&policy->rwsem); - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_UPDATE_POLICY_CPU, policy); - return 0; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7e1a389b4e92..2ee4888c1f47 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -368,9 +368,8 @@ static inline void cpufreq_resume(void) {} #define CPUFREQ_INCOMPATIBLE (1) #define CPUFREQ_NOTIFY (2) #define CPUFREQ_START (3) -#define CPUFREQ_UPDATE_POLICY_CPU (4) -#define CPUFREQ_CREATE_POLICY (5) -#define CPUFREQ_REMOVE_POLICY (6) +#define CPUFREQ_CREATE_POLICY (4) +#define CPUFREQ_REMOVE_POLICY (5) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); From a685c6d023c77303ca8700f37bf06eb85ffbf06a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:11 +0530 Subject: [PATCH 31/46] cpufreq: stats: create sysfs group once we are ready Userspace is free to read value of any file from cpufreq/stats directory once they are created. __cpufreq_stats_create_table() is creating the sysfs files first and then allocating resources for them. Though it would be quite difficult to trigger the racy situation here, but for the sake of keeping sensible code lets create sysfs entries only after we are ready to go. This also does some makeup to the routine to make it look better. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 44 +++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index e1fe0a945639..5d7bf9b1dfc5 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -164,12 +164,13 @@ static void cpufreq_stats_free_table(unsigned int cpu) static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) { - unsigned int i, count = 0, ret = 0; + unsigned int i = 0, count = 0, ret = -ENOMEM; struct cpufreq_stats *stats; unsigned int alloc_size; unsigned int cpu = policy->cpu; struct cpufreq_frequency_table *pos, *table; + /* We need cpufreq table for creating stats table */ table = cpufreq_frequency_get_table(cpu); if (unlikely(!table)) return 0; @@ -179,15 +180,10 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) return -EEXIST; stats = kzalloc(sizeof(*stats), GFP_KERNEL); - if ((stats) == NULL) + if (!stats) return -ENOMEM; - ret = sysfs_create_group(&policy->kobj, &stats_attr_group); - if (ret) - goto error_out; - - policy->stats = stats; - + /* Find total allocation size */ cpufreq_for_each_valid_entry(pos, table) count++; @@ -196,32 +192,42 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) #ifdef CONFIG_CPU_FREQ_STAT_DETAILS alloc_size += count * count * sizeof(int); #endif - stats->max_state = count; + + /* Allocate memory for time_in_state/freq_table/trans_table in one go */ stats->time_in_state = kzalloc(alloc_size, GFP_KERNEL); - if (!stats->time_in_state) { - ret = -ENOMEM; - goto error_alloc; - } + if (!stats->time_in_state) + goto free_stat; + stats->freq_table = (unsigned int *)(stats->time_in_state + count); #ifdef CONFIG_CPU_FREQ_STAT_DETAILS stats->trans_table = stats->freq_table + count; #endif - i = 0; + + stats->max_state = count; + + /* Find valid-unique entries */ cpufreq_for_each_valid_entry(pos, table) if (freq_table_get_index(stats, pos->frequency) == -1) stats->freq_table[i++] = pos->frequency; stats->state_num = i; + spin_lock(&cpufreq_stats_lock); stats->last_time = get_jiffies_64(); stats->last_index = freq_table_get_index(stats, policy->cur); spin_unlock(&cpufreq_stats_lock); - return 0; -error_alloc: - sysfs_remove_group(&policy->kobj, &stats_attr_group); -error_out: - kfree(stats); + + policy->stats = stats; + ret = sysfs_create_group(&policy->kobj, &stats_attr_group); + if (!ret) + return 0; + + /* We failed, release resources */ policy->stats = NULL; + kfree(stats->time_in_state); +free_stat: + kfree(stats); + return ret; } From c960f9b22d63934401949fb20d10ec4e69b535b2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:12 +0530 Subject: [PATCH 32/46] cpufreq: stats: time_in_state can't be NULL in cpufreq_stats_update() 'time_in_state' can't be NULL if 'stats' is valid. These are allocated together and only if time_in_state is allocated successfully, we update policy->stats. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5d7bf9b1dfc5..a9a6dc479adf 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -35,9 +35,7 @@ static int cpufreq_stats_update(struct cpufreq_stats *stats) unsigned long long cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); - if (stats->time_in_state) - stats->time_in_state[stats->last_index] += - cur_time - stats->last_time; + stats->time_in_state[stats->last_index] += cur_time - stats->last_time; stats->last_time = cur_time; spin_unlock(&cpufreq_stats_lock); return 0; From 9225913d386537f84f0eb72f022a8b4b9e684846 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:13 +0530 Subject: [PATCH 33/46] cpufreq: stats: don't update stats from show_trans_table() cpufreq_stats_update() updates time_in_state and nothing else. It should ideally be updated only in two cases: - User requested for the current value of time_in_state. - We have switched states and so need to update time for the last state. Currently, we are also doing this while user asks for the transition table of frequencies. It wouldn't do any harm, but no good as well. Its useless here. Remove it. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index a9a6dc479adf..4cd261156fff 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -68,7 +68,6 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j; - cpufreq_stats_update(stats); len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i < stats->state_num; i++) { From e73476949c6d1e2edd04bbf7b5bae30afa370e33 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:14 +0530 Subject: [PATCH 34/46] cpufreq: stats: don't update stats on false notifiers We need to call cpufreq_stats_update() to update 'time_in_state' for the last frequency. This is achieved by calling it from cpufreq_stat_notifier_trans(), which is called after frequency transition. But if we detect that the cpu's frequency haven't really changed and its a false POSTCHANGE notification, we don't really need to update time_in_state. It wouldn't cause any harm in calling cpufreq_stats_update() but we can avoid calling it here and call it when the frequency really changes. The result will be the same but more efficient. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 4cd261156fff..c948086a332b 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -289,11 +289,11 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, if (old_index == -1 || new_index == -1) goto put_policy; - cpufreq_stats_update(stats); - if (old_index == new_index) goto put_policy; + cpufreq_stats_update(stats); + spin_lock(&cpufreq_stats_lock); stats->last_index = new_index; #ifdef CONFIG_CPU_FREQ_STAT_DETAILS From 490285c65e2398a533035dd9d75cb606a61ea39a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:15 +0530 Subject: [PATCH 35/46] cpufreq: stats: drop unnecessary locking There is no possibility of any race on updating last_index, trans_table or total_trans as these are updated only by cpufreq_stat_notifier_trans() which will be called sequentially. The only place where locking is still relevant is: cpufreq_stats_update(), which updates time_in_state and last_time. This can be called by two thread in parallel, that may result in races. The two threads being: - sysfs read of time_in_state - and frequency transition that calls cpufreq_stat_notifier_trans(). Remove locking from the first case mentioned above. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index c948086a332b..5e370a30a964 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -207,12 +207,10 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) cpufreq_for_each_valid_entry(pos, table) if (freq_table_get_index(stats, pos->frequency) == -1) stats->freq_table[i++] = pos->frequency; - stats->state_num = i; - spin_lock(&cpufreq_stats_lock); + stats->state_num = i; stats->last_time = get_jiffies_64(); stats->last_index = freq_table_get_index(stats, policy->cur); - spin_unlock(&cpufreq_stats_lock); policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); @@ -294,13 +292,11 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, cpufreq_stats_update(stats); - spin_lock(&cpufreq_stats_lock); stats->last_index = new_index; #ifdef CONFIG_CPU_FREQ_STAT_DETAILS stats->trans_table[old_index * stats->max_state + new_index]++; #endif stats->total_trans++; - spin_unlock(&cpufreq_stats_lock); put_policy: cpufreq_cpu_put(policy); From 7ab0256e57ae4423fbfb6b6c1611578c634702c9 Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Wed, 28 Jan 2015 13:53:28 -0800 Subject: [PATCH 36/46] intel_pstate: Add support for SkyLake Add SKL cpuid. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 742eefba12c2..7d9f822f2031 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -825,6 +825,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x46, core_params), ICPU(0x47, core_params), ICPU(0x4c, byt_params), + ICPU(0x4e, core_params), ICPU(0x4f, core_params), ICPU(0x56, core_params), {} From d01b1f48c5fd95901203bd830458eaf619ed6c47 Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Wed, 28 Jan 2015 15:03:27 -0800 Subject: [PATCH 37/46] intel_pstate: expose turbo range to sysfs This patch adds "turbo_pct" to the intel_pstate sysfs interface. turbo_pct will display the percentage of the total supported pstates that are in the turbo range. This value is independent of whether turbo has been disabled or not. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 4 ++++ drivers/cpufreq/intel_pstate.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index 765d7fc0e692..7767ce6756be 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -37,6 +37,10 @@ controlling P state selection. These files have been added to no_turbo: limits the driver to selecting P states below the turbo frequency range. + turbo_pct: displays the percentage of the total performance that + is supported by hardware that is in the turbo range. This number + is independent of whether turbo has been disabled or not. + For contemporary Intel processors, the frequency is controlled by the processor itself and the P-states exposed to software are related to performance levels. The idea that frequency can be set to a single diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7d9f822f2031..ed6dd7dac094 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -338,6 +338,22 @@ static void __init intel_pstate_debug_expose_params(void) return sprintf(buf, "%u\n", limits.object); \ } +static ssize_t show_turbo_pct(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpudata *cpu; + int total, no_turbo, turbo_pct; + uint32_t turbo_fp; + + cpu = all_cpu_data[0]; + + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; + no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; + turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); + turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); + return sprintf(buf, "%u\n", turbo_pct); +} + static ssize_t show_no_turbo(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -418,11 +434,13 @@ show_one(min_perf_pct, min_perf_pct); define_one_global_rw(no_turbo); define_one_global_rw(max_perf_pct); define_one_global_rw(min_perf_pct); +define_one_global_ro(turbo_pct); static struct attribute *intel_pstate_attributes[] = { &no_turbo.attr, &max_perf_pct.attr, &min_perf_pct.attr, + &turbo_pct.attr, NULL }; From 0522424ecb333c0874c4e74bc053dd662bed40df Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Wed, 28 Jan 2015 15:03:28 -0800 Subject: [PATCH 38/46] intel_pstate: Add num_pstates to sysfs Add a sysfs interface to display the total number of supported pstates. This value is independent of whether turbo has been enabled or disabled. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 4 ++++ drivers/cpufreq/intel_pstate.c | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index 7767ce6756be..655750743fb0 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -41,6 +41,10 @@ controlling P state selection. These files have been added to is supported by hardware that is in the turbo range. This number is independent of whether turbo has been disabled or not. + num_pstates: displays the number of pstates that are supported + by hardware. This number is independent of whether turbo has + been disabled or not. + For contemporary Intel processors, the frequency is controlled by the processor itself and the P-states exposed to software are related to performance levels. The idea that frequency can be set to a single diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ed6dd7dac094..80ecc351d613 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -354,6 +354,17 @@ static ssize_t show_turbo_pct(struct kobject *kobj, return sprintf(buf, "%u\n", turbo_pct); } +static ssize_t show_num_pstates(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpudata *cpu; + int total; + + cpu = all_cpu_data[0]; + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; + return sprintf(buf, "%u\n", total); +} + static ssize_t show_no_turbo(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -435,12 +446,14 @@ define_one_global_rw(no_turbo); define_one_global_rw(max_perf_pct); define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); +define_one_global_ro(num_pstates); static struct attribute *intel_pstate_attributes[] = { &no_turbo.attr, &max_perf_pct.attr, &min_perf_pct.attr, &turbo_pct.attr, + &num_pstates.attr, NULL }; From 630ec286dd85c5838493ab7ef975881c42069ef0 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 29 Jan 2015 12:17:13 -0800 Subject: [PATCH 39/46] intel_pstate: respect cpufreq policy request When thermal or other subsystem requests to change the policy, use that irrepective of whether cpufreq policy is PERFORMANCE or not. Without this change, when thermal subsystem passive policy wants to reduce performance, it still runs at 100%. Signed-off-by: Srinivas Pandruvada Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 80ecc351d613..dfee5725903b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -919,7 +919,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && + policy->max >= policy->cpuinfo.max_freq) { limits.min_perf_pct = 100; limits.min_perf = int_tofp(1); limits.max_policy_pct = 100; From a04759924e2528c5cc2af49b183c5638fe8d9bed Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Thu, 29 Jan 2015 13:03:52 -0800 Subject: [PATCH 40/46] intel_pstate: honor user space min_perf_pct override on resume If the user has requested an override of the min_perf_pct via sysfs, then it should be restored whenever policy is updated, such as on resume. Take the max of whatever the user requested and whatever the policy is. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dfee5725903b..e7e808d9a8af 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -148,6 +148,8 @@ struct perf_limits { int32_t min_perf; int max_policy_pct; int max_sysfs_pct; + int min_policy_pct; + int min_sysfs_pct; }; static struct perf_limits limits = { @@ -159,6 +161,8 @@ static struct perf_limits limits = { .min_perf = 0, .max_policy_pct = 100, .max_sysfs_pct = 100, + .min_policy_pct = 0, + .min_sysfs_pct = 0, }; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, @@ -431,7 +435,9 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - limits.min_perf_pct = clamp_t(int, input, 0 , 100); + + limits.min_sysfs_pct = clamp_t(int, input, 0 , 100); + limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); if (hwp_active) @@ -921,6 +927,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && policy->max >= policy->cpuinfo.max_freq) { + limits.min_policy_pct = 100; limits.min_perf_pct = 100; limits.min_perf = int_tofp(1); limits.max_policy_pct = 100; @@ -930,8 +937,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) return 0; } - limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; - limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); + limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100); + limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; From 6ffae8c06fab058d6c3f8ecb7f921327721034e7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 31 Jan 2015 06:02:44 +0530 Subject: [PATCH 41/46] cpufreq: Set cpufreq_cpu_data to NULL before putting kobject In __cpufreq_remove_dev_finish(), per-cpu 'cpufreq_cpu_data' needs to be cleared before calling kobject_put(&policy->kobj) and under cpufreq_driver_lock. Otherwise, if someone else calls cpufreq_cpu_get() in parallel with it, they can obtain a non-NULL policy from that after kobject_put(&policy->kobj) was executed. Consider this case: Thread A Thread B cpufreq_cpu_get() acquire cpufreq_driver_lock read-per-cpu cpufreq_cpu_data kobject_put(&policy->kobj); kobject_get(&policy->kobj); ... per_cpu(&cpufreq_cpu_data, cpu) = NULL And this will result in a warning like this one: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 4 at include/linux/kref.h:47 kobject_get+0x41/0x50() Modules linked in: acpi_cpufreq(+) nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c sd_mod ixgbe igb mdio ahci hwmon ... Call Trace: [] dump_stack+0x46/0x58 [] warn_slowpath_common+0x81/0xa0 [] warn_slowpath_null+0x1a/0x20 [] kobject_get+0x41/0x50 [] cpufreq_cpu_get+0x75/0xc0 [] cpufreq_update_policy+0x2e/0x1f0 [] ? up+0x32/0x50 [] ? acpi_ns_get_node+0xcb/0xf2 [] ? acpi_evaluate_object+0x22c/0x252 [] ? acpi_get_handle+0x95/0xc0 [] ? acpi_has_method+0x25/0x40 [] acpi_processor_ppc_has_changed+0x77/0x82 [] ? move_linked_works+0x66/0x90 [] acpi_processor_notify+0x58/0xe7 [] acpi_ev_notify_dispatch+0x44/0x5c [] acpi_os_execute_deferred+0x15/0x22 [] process_one_work+0x160/0x410 [] worker_thread+0x11b/0x520 [] ? rescuer_thread+0x380/0x380 [] kthread+0xe1/0x100 [] ? kthread_create_on_node+0x1b0/0x1b0 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x1b0/0x1b0 ---[ end trace 89e66eb9795efdf7 ]--- The actual code flow is as follows: Thread A: Workqueue: kacpi_notify acpi_processor_notify() acpi_processor_ppc_has_changed() cpufreq_update_policy() cpufreq_cpu_get() kobject_get() Thread B: xenbus_thread() xenbus_thread() msg->u.watch.handle->callback() handle_vcpu_hotplug_event() vcpu_hotplug() cpu_down() __cpu_notify(CPU_POST_DEAD..) cpufreq_cpu_callback() __cpufreq_remove_dev_finish() cpufreq_policy_put_kobj() kobject_put() cpufreq_cpu_get() gets the policy from per-cpu variable cpufreq_cpu_data under cpufreq_driver_lock, and once it gets a valid policy it expects it to not be freed until cpufreq_cpu_put() is called. But the race happens when another thread puts the kobject first and updates cpufreq_cpu_data before or later. And so the first thread gets a valid policy structure and before it does kobject_get() on it, the second one has already done kobject_put(). Fix this by setting cpufreq_cpu_data to NULL before putting the kobject and that too under locks. Reported-by: Ethan Zhao Reported-by: Santosh Shilimkar Signed-off-by: Viresh Kumar Cc: 3.12+ # 3.12+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ca69f42b8e1e..2b181f75da15 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1392,9 +1392,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev, unsigned long flags; struct cpufreq_policy *policy; - read_lock_irqsave(&cpufreq_driver_lock, flags); + write_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data, cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); + per_cpu(cpufreq_cpu_data, cpu) = NULL; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); @@ -1449,7 +1450,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, } } - per_cpu(cpufreq_cpu_data, cpu) = NULL; return 0; } From 1e63eaf0c45f0ac733ef2818cd93e3e9047ef1c5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Jan 2015 14:06:07 +0530 Subject: [PATCH 42/46] cpufreq: Drop cpufreq_disabled() check from cpufreq_cpu_{get|put}() When cpufreq is disabled, the per-cpu variable would have been set to NULL. Remove this unnecessary check. [ Changelog from Saravana Kannan. ] Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2b181f75da15..158709418e21 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -203,7 +203,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) struct cpufreq_policy *policy = NULL; unsigned long flags; - if (cpufreq_disabled() || (cpu >= nr_cpu_ids)) + if (cpu >= nr_cpu_ids) return NULL; if (!down_read_trylock(&cpufreq_rwsem)) @@ -230,9 +230,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get); void cpufreq_cpu_put(struct cpufreq_policy *policy) { - if (cpufreq_disabled()) - return; - kobject_put(&policy->kobj); up_read(&cpufreq_rwsem); } From b4f0676fe20da6abe4577cf960862bed7a31647e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Jan 2015 14:06:08 +0530 Subject: [PATCH 43/46] cpufreq: Create for_each_policy() To make code more readable and less error prone, lets create a helper macro for iterating over all active policies. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 158709418e21..baa238fbadc1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -31,6 +31,12 @@ #include #include +/* Macros to iterate over lists */ +/* Iterate over online CPUs policies */ +static LIST_HEAD(cpufreq_policy_list); +#define for_each_policy(__policy) \ + list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) + /** * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock @@ -41,7 +47,6 @@ static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback); static DEFINE_RWLOCK(cpufreq_driver_lock); DEFINE_MUTEX(cpufreq_governor_lock); -static LIST_HEAD(cpufreq_policy_list); /* This one keeps track of the previously set governor of a removed CPU */ static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor); @@ -1113,7 +1118,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Check if this cpu was hot-unplugged earlier and has siblings */ read_lock_irqsave(&cpufreq_driver_lock, flags); - list_for_each_entry(policy, &cpufreq_policy_list, policy_list) { + for_each_policy(policy) { if (cpumask_test_cpu(cpu, policy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); ret = cpufreq_add_policy_cpu(policy, cpu, dev); @@ -1647,7 +1652,7 @@ void cpufreq_suspend(void) pr_debug("%s: Suspending Governors\n", __func__); - list_for_each_entry(policy, &cpufreq_policy_list, policy_list) { + for_each_policy(policy) { if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) pr_err("%s: Failed to stop governor for policy: %p\n", __func__, policy); @@ -1681,7 +1686,7 @@ void cpufreq_resume(void) pr_debug("%s: Resuming Governors\n", __func__); - list_for_each_entry(policy, &cpufreq_policy_list, policy_list) { + for_each_policy(policy) { if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) pr_err("%s: Failed to resume driver: %p\n", __func__, policy); @@ -2324,7 +2329,7 @@ static int cpufreq_boost_set_sw(int state) struct cpufreq_policy *policy; int ret = -EINVAL; - list_for_each_entry(policy, &cpufreq_policy_list, policy_list) { + for_each_policy(policy) { freq_table = cpufreq_frequency_get_table(policy->cpu); if (freq_table) { ret = cpufreq_frequency_table_cpuinfo(policy, From f7b2706117bc4459cd748352cbe4ae69a41c8ed6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Jan 2015 14:06:09 +0530 Subject: [PATCH 44/46] cpufreq: Create for_each_governor() To make code more readable and less error prone, lets create a helper macro for iterating over all available governors. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index baa238fbadc1..28e59a48b35f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -37,6 +37,11 @@ static LIST_HEAD(cpufreq_policy_list); #define for_each_policy(__policy) \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) +/* Iterate over governors */ +static LIST_HEAD(cpufreq_governor_list); +#define for_each_governor(__governor) \ + list_for_each_entry(__governor, &cpufreq_governor_list, governor_list) + /** * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock @@ -99,7 +104,6 @@ void disable_cpufreq(void) { off = 1; } -static LIST_HEAD(cpufreq_governor_list); static DEFINE_MUTEX(cpufreq_governor_mutex); bool have_governor_per_policy(void) @@ -434,7 +438,7 @@ static struct cpufreq_governor *find_governor(const char *str_governor) { struct cpufreq_governor *t; - list_for_each_entry(t, &cpufreq_governor_list, governor_list) + for_each_governor(t) if (!strncasecmp(str_governor, t->name, CPUFREQ_NAME_LEN)) return t; @@ -636,7 +640,7 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, goto out; } - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { + for_each_governor(t) { if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) goto out; From 17ad13ba842bd5c197e20c17e107788aa0498ba9 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Feb 2015 19:21:21 +0100 Subject: [PATCH 45/46] cpufreq-dt: Drop unnecessary check before cpufreq_cooling_unregister() invocation The cpufreq_cooling_unregister() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index fde97d6e31d6..bab67db54b7e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -320,8 +320,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) { struct private_data *priv = policy->driver_data; - if (priv->cdev) - cpufreq_cooling_unregister(priv->cdev); + cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); of_free_opp_table(priv->cpu_dev); clk_put(policy->clk); From d64c3b0bb9195d4de856d841337368d930cdb0ed Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Fri, 6 Feb 2015 13:41:55 -0800 Subject: [PATCH 46/46] intel_pstate: provide option to only use intel_pstate with HWP Allow users the option to disable the driver for any hardware which does not support HWP. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 3 +++ drivers/cpufreq/intel_pstate.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4df73da11adc..07e6701a1439 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1469,6 +1469,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. no_hwp Do not enable hardware P state control (HWP) if available. + hwp_only + Only load intel_pstate on systems which support + hardware P state control (HWP) if available. intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e7e808d9a8af..872c5772c5d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1019,6 +1019,7 @@ static struct cpufreq_driver intel_pstate_driver = { static int __initdata no_load; static int __initdata no_hwp; +static int __initdata hwp_only; static unsigned int force_load; static int intel_pstate_msrs_not_valid(void) @@ -1216,6 +1217,9 @@ static int __init intel_pstate_init(void) if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp) intel_pstate_hwp_enable(); + if (!hwp_active && hwp_only) + goto out; + rc = cpufreq_register_driver(&intel_pstate_driver); if (rc) goto out; @@ -1250,6 +1254,8 @@ static int __init intel_pstate_setup(char *str) no_hwp = 1; if (!strcmp(str, "force")) force_load = 1; + if (!strcmp(str, "hwp_only")) + hwp_only = 1; return 0; } early_param("intel_pstate", intel_pstate_setup);