From b3a826c05bd96122fd54c297062c495dbcc7511e Mon Sep 17 00:00:00 2001 From: James Geboski Date: Thu, 16 Oct 2014 07:37:11 -0400 Subject: [PATCH 01/21] cpufreq: allow powersave governor as the default without expert mode The intel_pstate driver only supports the performance and the powersave governors. With the performance governor ensuring the highest possible performance settings, userspace tools fail to make any lasting changes. In order to allow userspace tools to make modifications to the settings, the powersave governor must be in use. This makes having the powersave governor as the default convenient for systems where the intel_pstate driver is being employed. Having to enable expert mode in the kernel configuration is just a headache for such a trivial task. This patch applies to all kernel versions 2.6.38 or greater after the migration from CONFIG_EMBEDDED to CONFIG_EXPERT (6a108a14fa35). Most importantly, this applies to kernel versions 3.9 or greater when the intel_pstate driver was introduced. Signed-off-by: James Geboski Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 3489f8f5fada..73df7dbc250a 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -63,7 +63,6 @@ config CPU_FREQ_DEFAULT_GOV_PERFORMANCE config CPU_FREQ_DEFAULT_GOV_POWERSAVE bool "powersave" - depends on EXPERT select CPU_FREQ_GOV_POWERSAVE help Use the CPUFreq governor 'powersave' as default. This sets From a0a22cf14472fa5cd44c8d107eba1a827df9549c Mon Sep 17 00:00:00 2001 From: Kelvin Cheung Date: Fri, 17 Oct 2014 18:23:31 +0800 Subject: [PATCH 02/21] cpufreq: Loongson1: Add cpufreq driver for Loongson1B This patch adds cpufreq driver for Loongson1B which is capable of changing the CPU frequency dynamically. Signed-off-by: Kelvin Cheung Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 10 ++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/ls1x-cpufreq.c | 223 +++++++++++++++++++++++++++++++++ 3 files changed, 234 insertions(+) create mode 100644 drivers/cpufreq/ls1x-cpufreq.c diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 73df7dbc250a..4de4dfae4ccc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -249,6 +249,16 @@ config LOONGSON2_CPUFREQ If in doubt, say N. +config LOONGSON1_CPUFREQ + tristate "Loongson1 CPUFreq Driver" + help + This option adds a CPUFreq driver for loongson1 processors which + support software configurable cpu frequency. + + For details, take a look at . + + If in doubt, say N. + endmenu menu "PowerPC CPU frequency scaling drivers" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 40c53dc1937e..215e447abec6 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_CRIS_MACH_ARTPEC3) += cris-artpec3-cpufreq.o obj-$(CONFIG_ETRAXFS) += cris-etraxfs-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o +obj-$(CONFIG_LOONGSON1_CPUFREQ) += ls1x-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o diff --git a/drivers/cpufreq/ls1x-cpufreq.c b/drivers/cpufreq/ls1x-cpufreq.c new file mode 100644 index 000000000000..25fbd6a1374f --- /dev/null +++ b/drivers/cpufreq/ls1x-cpufreq.c @@ -0,0 +1,223 @@ +/* + * CPU Frequency Scaling for Loongson 1 SoC + * + * Copyright (C) 2014 Zhang, Keguang + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static struct { + struct device *dev; + struct clk *clk; /* CPU clk */ + struct clk *mux_clk; /* MUX of CPU clk */ + struct clk *pll_clk; /* PLL clk */ + struct clk *osc_clk; /* OSC clk */ + unsigned int max_freq; + unsigned int min_freq; +} ls1x_cpufreq; + +static int ls1x_cpufreq_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + if (val == CPUFREQ_POSTCHANGE) + current_cpu_data.udelay_val = loops_per_jiffy; + + return NOTIFY_OK; +} + +static struct notifier_block ls1x_cpufreq_notifier_block = { + .notifier_call = ls1x_cpufreq_notifier +}; + +static int ls1x_cpufreq_target(struct cpufreq_policy *policy, + unsigned int index) +{ + unsigned int old_freq, new_freq; + + old_freq = policy->cur; + new_freq = policy->freq_table[index].frequency; + + /* + * The procedure of reconfiguring CPU clk is as below. + * + * - Reparent CPU clk to OSC clk + * - Reset CPU clock (very important) + * - Reconfigure CPU DIV + * - Reparent CPU clk back to CPU DIV clk + */ + + dev_dbg(ls1x_cpufreq.dev, "%u KHz --> %u KHz\n", old_freq, new_freq); + clk_set_parent(policy->clk, ls1x_cpufreq.osc_clk); + __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU, + LS1X_CLK_PLL_DIV); + __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU), + LS1X_CLK_PLL_DIV); + clk_set_rate(ls1x_cpufreq.mux_clk, new_freq * 1000); + clk_set_parent(policy->clk, ls1x_cpufreq.mux_clk); + + return 0; +} + +static int ls1x_cpufreq_init(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *freq_tbl; + unsigned int pll_freq, freq; + int steps, i, ret; + + pll_freq = clk_get_rate(ls1x_cpufreq.pll_clk) / 1000; + + steps = 1 << DIV_CPU_WIDTH; + freq_tbl = kzalloc(sizeof(*freq_tbl) * steps, GFP_KERNEL); + if (!freq_tbl) { + dev_err(ls1x_cpufreq.dev, + "failed to alloc cpufreq_frequency_table\n"); + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < (steps - 1); i++) { + freq = pll_freq / (i + 1); + if ((freq < ls1x_cpufreq.min_freq) || + (freq > ls1x_cpufreq.max_freq)) + freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID; + else + freq_tbl[i].frequency = freq; + dev_dbg(ls1x_cpufreq.dev, + "cpufreq table: index %d: frequency %d\n", i, + freq_tbl[i].frequency); + } + freq_tbl[i].frequency = CPUFREQ_TABLE_END; + + policy->clk = ls1x_cpufreq.clk; + ret = cpufreq_generic_init(policy, freq_tbl, 0); + if (ret) + kfree(freq_tbl); +out: + return ret; +} + +static int ls1x_cpufreq_exit(struct cpufreq_policy *policy) +{ + kfree(policy->freq_table); + return 0; +} + +static struct cpufreq_driver ls1x_cpufreq_driver = { + .name = "cpufreq-ls1x", + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = ls1x_cpufreq_target, + .get = cpufreq_generic_get, + .init = ls1x_cpufreq_init, + .exit = ls1x_cpufreq_exit, + .attr = cpufreq_generic_attr, +}; + +static int ls1x_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_notifier(&ls1x_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + cpufreq_unregister_driver(&ls1x_cpufreq_driver); + + return 0; +} + +static int ls1x_cpufreq_probe(struct platform_device *pdev) +{ + struct plat_ls1x_cpufreq *pdata = pdev->dev.platform_data; + struct clk *clk; + int ret; + + if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) + return -EINVAL; + + ls1x_cpufreq.dev = &pdev->dev; + + clk = devm_clk_get(&pdev->dev, pdata->clk_name); + if (IS_ERR(clk)) { + dev_err(ls1x_cpufreq.dev, "unable to get %s clock\n", + pdata->clk_name); + ret = PTR_ERR(clk); + goto out; + } + ls1x_cpufreq.clk = clk; + + clk = clk_get_parent(clk); + if (IS_ERR(clk)) { + dev_err(ls1x_cpufreq.dev, "unable to get parent of %s clock\n", + __clk_get_name(ls1x_cpufreq.clk)); + ret = PTR_ERR(clk); + goto out; + } + ls1x_cpufreq.mux_clk = clk; + + clk = clk_get_parent(clk); + if (IS_ERR(clk)) { + dev_err(ls1x_cpufreq.dev, "unable to get parent of %s clock\n", + __clk_get_name(ls1x_cpufreq.mux_clk)); + ret = PTR_ERR(clk); + goto out; + } + ls1x_cpufreq.pll_clk = clk; + + clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name); + if (IS_ERR(clk)) { + dev_err(ls1x_cpufreq.dev, "unable to get %s clock\n", + pdata->osc_clk_name); + ret = PTR_ERR(clk); + goto out; + } + ls1x_cpufreq.osc_clk = clk; + + ls1x_cpufreq.max_freq = pdata->max_freq; + ls1x_cpufreq.min_freq = pdata->min_freq; + + ret = cpufreq_register_driver(&ls1x_cpufreq_driver); + if (ret) { + dev_err(ls1x_cpufreq.dev, + "failed to register cpufreq driver: %d\n", ret); + goto out; + } + + ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + + if (!ret) + goto out; + + dev_err(ls1x_cpufreq.dev, "failed to register cpufreq notifier: %d\n", + ret); + + cpufreq_unregister_driver(&ls1x_cpufreq_driver); +out: + return ret; +} + +static struct platform_driver ls1x_cpufreq_platdrv = { + .driver = { + .name = "ls1x-cpufreq", + .owner = THIS_MODULE, + }, + .probe = ls1x_cpufreq_probe, + .remove = ls1x_cpufreq_remove, +}; + +module_platform_driver(ls1x_cpufreq_platdrv); + +MODULE_AUTHOR("Kelvin Cheung "); +MODULE_DESCRIPTION("Loongson 1 CPUFreq driver"); +MODULE_LICENSE("GPL"); From 0a1e879d353f6f6e661d14c6a0e42dd19efbc504 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 17 Oct 2014 22:09:48 +0000 Subject: [PATCH 03/21] cpufreq: cpufreq-dt: Improve debug about matching OPP During test of new DT OPPs it's very helpful to print the matching OPP in case of frequency change. So it will be easier to find frequency rounding issues in the dts file. Signed-off-by: Stefan Wahren Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index f657c571b18e..725fb1d86578 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -58,6 +58,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) old_freq = clk_get_rate(cpu_clk) / 1000; if (!IS_ERR(cpu_reg)) { + unsigned long opp_freq; + rcu_read_lock(); opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); if (IS_ERR(opp)) { @@ -67,9 +69,12 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return PTR_ERR(opp); } volt = dev_pm_opp_get_voltage(opp); + opp_freq = dev_pm_opp_get_freq(opp); rcu_read_unlock(); tol = volt * priv->voltage_tolerance / 100; volt_old = regulator_get_voltage(cpu_reg); + dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", + opp_freq / 1000, volt); } dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", From 8197bb1bffbee6a7678d20b4c1e77940cdaa8640 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 17 Oct 2014 22:09:49 +0000 Subject: [PATCH 04/21] cpufreq: cpufreq-dt: Handle regulator_get_voltage() failure In error cases regulator_get_voltage() returns a negative value. So take care of it. Signed-off-by: Stefan Wahren Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 725fb1d86578..8cba13df5f28 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -78,7 +78,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) } dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", - old_freq / 1000, volt_old ? volt_old / 1000 : -1, + old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, new_freq / 1000, volt ? volt / 1000 : -1); /* scaling up? scale voltage before frequency */ @@ -94,7 +94,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) ret = clk_set_rate(cpu_clk, freq_exact); if (ret) { dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - if (!IS_ERR(cpu_reg)) + if (!IS_ERR(cpu_reg) && volt_old > 0) regulator_set_voltage_tol(cpu_reg, volt_old, tol); return ret; } From 619c144c84bd240487204e91dff88247cde68d92 Mon Sep 17 00:00:00 2001 From: Vince Hsu Date: Mon, 10 Nov 2014 14:14:50 +0800 Subject: [PATCH 05/21] cpufreq: respect the min/max settings from user space When the user space tries to set scaling_(max|min)_freq through sysfs, the cpufreq_set_policy() asks other driver's opinions for the max/min frequencies. Some device drivers, like Tegra CPU EDP which is not upstreamed yet though, may constrain the CPU maximum frequency dynamically because of board design. So if the user space access happens and some driver is capping the cpu frequency at the same time, the user_policy->(max|min) is overridden by the capped value, and that's not expected by the user space. And if the user space is not invoked again, the CPU will always be capped by the user_policy->(max|min) even no drivers limit the CPU frequency any more. This patch preserves the user specified min/max settings, so that every time the cpufreq policy is updated, the new max/min can be re-evaluated correctly based on the user's expection and the present device drivers' status. Signed-off-by: Vince Hsu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 644b54e1e7d1..0721ab352e2a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -535,7 +535,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, static ssize_t store_##file_name \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - int ret; \ + int ret, temp; \ struct cpufreq_policy new_policy; \ \ ret = cpufreq_get_policy(&new_policy, policy->cpu); \ @@ -546,8 +546,10 @@ static ssize_t store_##file_name \ if (ret != 1) \ return -EINVAL; \ \ + temp = new_policy.object; \ ret = cpufreq_set_policy(policy, &new_policy); \ - policy->user_policy.object = policy->object; \ + if (!ret) \ + policy->user_policy.object = temp; \ \ return ret ? ret : count; \ } From 77873887729aaddec5cd27203a6ce8c4987733e4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 6 Nov 2014 09:40:46 -0800 Subject: [PATCH 06/21] x86: Add support for Intel HWP feature detection. Add support of Hardware Managed Performance States (HWP) described in Volume 3 section 14.4 of the SDM. One bit CPUID.06H:EAX[bit 7] expresses the presence of the HWP feature on the processor. The remaining bits CPUID.06H:EAX[bit 8-11] denote the presense of various HWP features. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/cpufeature.h | 5 +++++ arch/x86/kernel/cpu/scattered.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0bb1335313b2..aede2c347bde 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -189,6 +189,11 @@ #define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ +#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ +#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ +#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ +#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 4a8013d55947..60639093d536 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -36,6 +36,11 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, + { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, + { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 }, + { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, + { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, + { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, From 2f86dc4cddcb21290ca099e1dce2a53533c86e0b Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 6 Nov 2014 09:40:47 -0800 Subject: [PATCH 07/21] intel_pstate: Add support for HWP Add support of Hardware Managed Performance States (HWP) described in Volume 3 section 14.4 of the SDM. With HWP enbaled intel_pstate will no longer be responsible for selecting P states for the processor. intel_pstate will continue to register to the cpufreq core as the scaling driver for CPUs implementing HWP. In HWP mode intel_pstate provides three functions reporting frequency to the cpufreq core, support for the set_policy() interface from the core and maintaining the intel_pstate sysfs interface in /sys/devices/system/cpu/intel_pstate. User preferences expressed via the set_policy() interface or the sysfs interface are forwared to the CPU via the HWP MSR interface. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 33 +++++--- Documentation/kernel-parameters.txt | 3 + arch/x86/include/uapi/asm/msr-index.h | 41 ++++++++++ drivers/cpufreq/intel_pstate.c | 100 +++++++++++++++++++++++- 4 files changed, 165 insertions(+), 12 deletions(-) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index a69ffe1d54d5..765d7fc0e692 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -1,17 +1,28 @@ Intel P-state driver -------------------- -This driver implements a scaling driver with an internal governor for -Intel Core processors. The driver follows the same model as the -Transmeta scaling driver (longrun.c) and implements the setpolicy() -instead of target(). Scaling drivers that implement setpolicy() are -assumed to implement internal governors by the cpufreq core. All the -logic for selecting the current P state is contained within the -driver; no external governor is used by the cpufreq core. +This driver provides an interface to control the P state selection for +SandyBridge+ Intel processors. The driver can operate two different +modes based on the processor model legacy and Hardware P state (HWP) +mode. -Intel SandyBridge+ processors are supported. +In legacy mode the driver implements a scaling driver with an internal +governor for Intel Core processors. The driver follows the same model +as the Transmeta scaling driver (longrun.c) and implements the +setpolicy() instead of target(). Scaling drivers that implement +setpolicy() are assumed to implement internal governors by the cpufreq +core. All the logic for selecting the current P state is contained +within the driver; no external governor is used by the cpufreq core. -New sysfs files for controlling P state selection have been added to +In HWP mode P state selection is implemented in the processor +itself. The driver provides the interfaces between the cpufreq core and +the processor to control P state selection based on user preferences +and reporting frequency to the cpufreq core. In this mode the +internal governor code is disabled. + +In addtion to the interfaces provided by the cpufreq core for +controlling frequency the driver provides sysfs files for +controlling P state selection. These files have been added to /sys/devices/system/cpu/intel_pstate/ max_perf_pct: limits the maximum P state that will be requested by @@ -33,7 +44,9 @@ frequency is fiction for Intel Core processors. Even if the scaling driver selects a single P state the actual frequency the processor will run at is selected by the processor itself. -New debugfs files have also been added to /sys/kernel/debug/pstate_snb/ +For legacy mode debugfs files have also been added to allow tuning of +the internal governor algorythm. These files are located at +/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode. deadband d_gain_pct diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c81a860cc2b..907a0f119bee 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1446,6 +1446,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. disable Do not enable intel_pstate as the default scaling driver for the supported processors + no_hwp + Do not enable hardware P state control (HWP) + if available. intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index e21331ce368f..62838e54947d 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -152,6 +152,45 @@ #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (x & 0xff) +#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) +#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) +#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) +#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + #define MSR_AMD64_MC0_MASK 0xc0010044 #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) @@ -345,6 +384,8 @@ #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 +#define MSR_MISC_PWR_MGMT 0x000001aa + #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 #define ENERGY_PERF_BIAS_PERFORMANCE 0 #define ENERGY_PERF_BIAS_NORMAL 6 diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 27bb6d3877ed..ba35db092239 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -137,6 +137,7 @@ struct cpu_defaults { static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; +static int hwp_active; struct perf_limits { int no_turbo; @@ -244,6 +245,34 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } +#define PCT_TO_HWP(x) (x * 255 / 100) +static void intel_pstate_hwp_set(void) +{ + int min, max, cpu; + u64 value, freq; + + get_online_cpus(); + + for_each_online_cpu(cpu) { + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + min = PCT_TO_HWP(limits.min_perf_pct); + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); + + max = PCT_TO_HWP(limits.max_perf_pct); + if (limits.no_turbo) { + rdmsrl( MSR_HWP_CAPABILITIES, freq); + max = HWP_GUARANTEED_PERF(freq); + } + + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); + wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + } + + put_online_cpus(); +} + /************************** debugfs begin ************************/ static int pid_param_set(void *data, u64 val) { @@ -279,6 +308,8 @@ static void __init intel_pstate_debug_expose_params(void) struct dentry *debugfs_parent; int i = 0; + if (hwp_active) + return; debugfs_parent = debugfs_create_dir("pstate_snb", NULL); if (IS_ERR_OR_NULL(debugfs_parent)) return; @@ -329,8 +360,12 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } + limits.no_turbo = clamp_t(int, input, 0, 1); + if (hwp_active) + intel_pstate_hwp_set(); + return count; } @@ -348,6 +383,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + if (hwp_active) + intel_pstate_hwp_set(); return count; } @@ -363,6 +400,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits.min_perf_pct = clamp_t(int, input, 0 , 100); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + if (hwp_active) + intel_pstate_hwp_set(); return count; } @@ -395,8 +434,16 @@ static void __init intel_pstate_sysfs_expose_params(void) rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); BUG_ON(rc); } - /************************** sysfs end ************************/ + +static void intel_pstate_hwp_enable(void) +{ + hwp_active++; + pr_info("intel_pstate HWP enabled\n"); + + wrmsrl( MSR_PM_ENABLE, 0x1); +} + static int byt_get_min_pstate(void) { u64 value; @@ -648,6 +695,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu) cpu->prev_mperf = mperf; } +static inline void intel_hwp_set_sample_time(struct cpudata *cpu) +{ + int delay; + + delay = msecs_to_jiffies(50); + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + static inline void intel_pstate_set_sample_time(struct cpudata *cpu) { int delay; @@ -694,6 +749,14 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); } +static void intel_hwp_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + intel_hwp_set_sample_time(cpu); +} + static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; @@ -737,6 +800,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); +static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { + ICPU(0x56, core_params), + {} +}; + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -753,9 +821,14 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_get_cpu_pstates(cpu); init_timer_deferrable(&cpu->timer); - cpu->timer.function = intel_pstate_timer_func; cpu->timer.data = (unsigned long)cpu; cpu->timer.expires = jiffies + HZ/100; + + if (!hwp_active) + cpu->timer.function = intel_pstate_timer_func; + else + cpu->timer.function = intel_hwp_timer_func; + intel_pstate_busy_pid_reset(cpu); intel_pstate_sample(cpu); @@ -792,6 +865,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.no_turbo = 0; return 0; } + limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); @@ -801,6 +875,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + if (hwp_active) + intel_pstate_hwp_set(); + return 0; } @@ -823,6 +900,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_info("intel_pstate CPU %d exiting\n", cpu_num); del_timer_sync(&all_cpu_data[cpu_num]->timer); + if (hwp_active) + return; + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); } @@ -866,6 +946,7 @@ static struct cpufreq_driver intel_pstate_driver = { }; static int __initdata no_load; +static int __initdata no_hwp; static int intel_pstate_msrs_not_valid(void) { @@ -959,6 +1040,15 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void) { struct acpi_table_header hdr; struct hw_vendor_info *v_info; + const struct x86_cpu_id *id; + u64 misc_pwr; + + id = x86_match_cpu(intel_pstate_cpu_oob_ids); + if (id) { + rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); + if ( misc_pwr & (1 << 8)) + return true; + } if (acpi_disabled || ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) @@ -982,6 +1072,7 @@ static int __init intel_pstate_init(void) int cpu, rc = 0; const struct x86_cpu_id *id; struct cpu_defaults *cpu_info; + struct cpuinfo_x86 *c = &boot_cpu_data; if (no_load) return -ENODEV; @@ -1011,6 +1102,9 @@ static int __init intel_pstate_init(void) if (!all_cpu_data) return -ENOMEM; + if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp) + intel_pstate_hwp_enable(); + rc = cpufreq_register_driver(&intel_pstate_driver); if (rc) goto out; @@ -1041,6 +1135,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; + if (!strcmp(str, "no_hwp")) + no_hwp = 1; return 0; } early_param("intel_pstate", intel_pstate_setup); From 43f8a966e91f387eabe85d2f2d12519c218f9dd0 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 6 Nov 2014 09:50:45 -0800 Subject: [PATCH 08/21] intel_pstate: Add CPUID for BDW-H CPU Add BDW-H to the list of supported processors. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ba35db092239..ab2e100a1807 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -793,6 +793,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x3f, core_params), ICPU(0x45, core_params), ICPU(0x46, core_params), + ICPU(0x47, core_params), ICPU(0x4c, byt_params), ICPU(0x4f, core_params), ICPU(0x56, core_params), From 7e7e8fe69820c6fa31395dbbd8e348e3c69cd2a9 Mon Sep 17 00:00:00 2001 From: Lenny Szubowicz Date: Thu, 13 Nov 2014 13:51:52 -0500 Subject: [PATCH 09/21] cpufreq: pcc: Enable autoload of pcc-cpufreq for ACPI processors The pcc-cpufreq driver is not automatically loaded on systems where the platform's power management setting requires this driver. Instead, on those systems no CPU frequency driver is registered and active. Make the autoloading matching criteria for loading the pcc-cpufreq driver the same as done in acpi-cpufreq by commit c655affbd524d01 ("ACPI / cpufreq: Add ACPI processor device IDs to acpi-cpufreq"). x86 CPU frequency drivers are now typically autoloaded by specifying MODULE_DEVICE_TABLE entries and x86cpu model specific matching. But pcc-cpufreq was omitted when acpi-cpufreq and other drivers were changed to use this approach. Both acpi-cpufreq and pcc-cpufreq depend on a distinct and mutually exclusive set of ACPI methods which are not directly tied to specific processor model numbers. Both of these drivers have init routines which look for their required ACPI methods. As a result, only the appropriate driver registers as the cpu frequency driver and the other one ends up being unloaded. Tested on various systems where acpi-cpufreq, intel_pstate, and pcc-cpufreq are the expected cpu frequency drivers. Signed-off-by: Lenny Szubowicz Signed-off-by: Joseph Szczypek Reported-by: Trinh Dao Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pcc-cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 4d2c8e861089..2a0d58959acf 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -603,6 +603,13 @@ static void __exit pcc_cpufreq_exit(void) free_percpu(pcc_cpu_info); } +static const struct acpi_device_id processor_device_ids[] = { + {ACPI_PROCESSOR_OBJECT_HID, }, + {ACPI_PROCESSOR_DEVICE_HID, }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, processor_device_ids); + MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); MODULE_VERSION(PCC_VERSION); MODULE_DESCRIPTION("Processor Clocking Control interface driver"); From f41f4815f8e81e8745fca8396d842adb74689c88 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 15 Nov 2014 08:50:44 +0530 Subject: [PATCH 10/21] cpufreq: Kconfig: Remove architecture specific menu entries CPUFreq driver's Kconfig entries are added in Kconfig. files and they are all included from the main Kconfig file using a menu entry. This creates another level of (unnecessary) hierarchy within the menuconfig entries. The problem occurs when there are drivers usable across architectures. Either their config entry is duplicated in all the supported architectures or is put into the main Kconfig entry. With the later one, we have menuconfig entries for drivers at two levels then. Fix these issues by getting rid of another level of menuconfig hierarchy and populate all drivers within the main cpufreq menu. To clearly distinguish where the drivers start from, also add a comment that will appear in menuconfig. Reported-by: Tang Yuantian Suggested-by: Scott Wood Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 50 ++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4de4dfae4ccc..29b2ef5a68b9 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -182,6 +182,8 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +comment "CPU frequency scaling drivers" + config CPUFREQ_DT tristate "Generic DT based cpufreq driver" depends on HAVE_CLK && OF @@ -195,19 +197,19 @@ config CPUFREQ_DT If in doubt, say N. -menu "x86 CPU frequency scaling drivers" -depends on X86 +if X86 source "drivers/cpufreq/Kconfig.x86" -endmenu +endif -menu "ARM CPU frequency scaling drivers" -depends on ARM || ARM64 +if ARM || ARM64 source "drivers/cpufreq/Kconfig.arm" -endmenu +endif -menu "AVR32 CPU frequency scaling drivers" -depends on AVR32 +if PPC32 || PPC64 +source "drivers/cpufreq/Kconfig.powerpc" +endif +if AVR32 config AVR32_AT32AP_CPUFREQ bool "CPU frequency driver for AT32AP" depends on PLATFORM_AT32AP @@ -215,12 +217,9 @@ config AVR32_AT32AP_CPUFREQ help This enables the CPU frequency driver for AT32AP processors. If in doubt, say N. +endif -endmenu - -menu "CPUFreq processor drivers" -depends on IA64 - +if IA64 config IA64_ACPI_CPUFREQ tristate "ACPI Processor P-States driver" depends on ACPI_PROCESSOR @@ -231,12 +230,9 @@ config IA64_ACPI_CPUFREQ For details, take a look at . If in doubt, say N. +endif -endmenu - -menu "MIPS CPUFreq processor drivers" -depends on MIPS - +if MIPS config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" help @@ -258,16 +254,9 @@ config LOONGSON1_CPUFREQ For details, take a look at . If in doubt, say N. +endif -endmenu - -menu "PowerPC CPU frequency scaling drivers" -depends on PPC32 || PPC64 -source "drivers/cpufreq/Kconfig.powerpc" -endmenu - -menu "SPARC CPU frequency scaling drivers" -depends on SPARC64 +if SPARC64 config SPARC_US3_CPUFREQ tristate "UltraSPARC-III CPU Frequency driver" help @@ -285,10 +274,9 @@ config SPARC_US2E_CPUFREQ For details, take a look at . If in doubt, say N. -endmenu +endif -menu "SH CPU Frequency scaling" -depends on SUPERH +if SUPERH config SH_CPU_FREQ tristate "SuperH CPU Frequency driver" help @@ -302,7 +290,7 @@ config SH_CPU_FREQ For details, take a look at . If unsure, say N. -endmenu +endif endif endmenu From 6d4e81ed89c092cb156c8d19cb68c8733cd502b3 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 24 Nov 2014 10:08:03 +0100 Subject: [PATCH 11/21] cpufreq: Ref the policy object sooner Do it before it's assigned to cpufreq_cpu_data, otherwise when a driver tries to get the cpu frequency during initialization the policy kobj is referenced and we get this warning: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 64 at include/linux/kref.h:47 kobject_get+0x64/0x70() Modules linked in: CPU: 1 PID: 64 Comm: irq/77-tegra-ac Not tainted 3.18.0-rc4-next-20141114ccu-00050-g3eff942 #326 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xd8) [] (dump_stack) from [] (warn_slowpath_common+0x84/0xb4) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (kobject_get+0x64/0x70) [] (kobject_get) from [] (cpufreq_cpu_get+0x88/0xc8) [] (cpufreq_cpu_get) from [] (cpufreq_get+0xc/0x64) [] (cpufreq_get) from [] (actmon_thread_isr+0x134/0x198) [] (actmon_thread_isr) from [] (irq_thread_fn+0x1c/0x40) [] (irq_thread_fn) from [] (irq_thread+0x134/0x174) [] (irq_thread) from [] (kthread+0xdc/0xf4) [] (kthread) from [] (ret_from_fork+0x14/0x3c) ---[ end trace b7bd64a81b340c59 ]--- Signed-off-by: Tomeu Vizoso Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 42 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c9701e9e53e4..de2c3e198b62 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -900,46 +900,31 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, struct freq_attr **drv_attr; int ret = 0; - /* prepare interface data */ - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - &dev->kobj, "cpufreq"); - if (ret) - return ret; - /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; while ((drv_attr) && (*drv_attr)) { ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) - goto err_out_kobj_put; + return ret; drv_attr++; } if (cpufreq_driver->get) { ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); if (ret) - goto err_out_kobj_put; + return ret; } ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) - goto err_out_kobj_put; + return ret; if (cpufreq_driver->bios_limit) { ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); if (ret) - goto err_out_kobj_put; + return ret; } - ret = cpufreq_add_dev_symlink(policy); - if (ret) - goto err_out_kobj_put; - - return ret; - -err_out_kobj_put: - kobject_put(&policy->kobj); - wait_for_completion(&policy->kobj_unregister); - return ret; + return cpufreq_add_dev_symlink(policy); } static void cpufreq_init_policy(struct cpufreq_policy *policy) @@ -1198,6 +1183,8 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) goto err_set_policy_cpu; } + down_write(&policy->rwsem); + /* related cpus should atleast have policy->cpus */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); @@ -1210,9 +1197,17 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (!recover_policy) { policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; + + /* prepare interface data */ + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + &dev->kobj, "cpufreq"); + if (ret) { + pr_err("%s: failed to init policy->kobj: %d\n", + __func__, ret); + goto err_init_policy_kobj; + } } - down_write(&policy->rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1303,6 +1298,11 @@ err_get_freq: per_cpu(cpufreq_cpu_data, j) = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + if (!recover_policy) { + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + } +err_init_policy_kobj: up_write(&policy->rwsem); if (cpufreq_driver->exit) From 608eab228205a42b58441f16cbf127ac3a40d24b Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Mon, 24 Nov 2014 04:59:26 +0100 Subject: [PATCH 12/21] cpufreq: pxa2xx: Add Kconfig entry Add ability for PXA2xx CPUFreq to be compiled as a module or not at all. Signed-off-by: Petr Cvek Acked-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 8 ++++++++ drivers/cpufreq/Makefile | 3 +-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 83a75dc84761..0f9a2c3c0e0d 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -247,3 +247,11 @@ config ARM_TEGRA_CPUFREQ default y help This adds the CPUFreq driver support for TEGRA SOCs. + +config ARM_PXA2xx_CPUFREQ + tristate "Intel PXA2xx CPUfreq driver" + depends on PXA27x || PXA25x + help + This add the CPUFreq driver support for Intel PXA2xx SOCs. + + If in doubt, say N. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 215e447abec6..b3ca7b0b2c33 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -61,8 +61,7 @@ obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o -obj-$(CONFIG_PXA25x) += pxa2xx-cpufreq.o -obj-$(CONFIG_PXA27x) += pxa2xx-cpufreq.o +obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o obj-$(CONFIG_ARM_S3C24XX_CPUFREQ) += s3c24xx-cpufreq.o obj-$(CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS) += s3c24xx-cpufreq-debugfs.o From 90452e61137a3e88aa705d3efcb3874f3ce8d390 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Nov 2014 06:07:49 +0530 Subject: [PATCH 13/21] cpufreq: Fix formatting issues in 'struct cpufreq_driver' Adding any new callback to 'struct cpufreq_driver' gives following checkpatch warning: WARNING: Unnecessary space before function pointer arguments + void (*ready) (struct cpufreq_policy *policy); This is because we have been using a tab spacing between function pointer name and its arguments and the new one tried to follow that. Though we normally don't try to fix every checkpatch warning, specially around formatting issues as that creates unnecessary noise over lists. But I thought we better fix this so that new additions don't generate these warnings plus it looks far better/symmetric now. So, remove these tab spacing issues in 'struct cpufreq_driver' only + fix alignment of all members. Signed-off-by: Viresh Kumar Reviewed-by: Eduardo Valentin Tested-by: Eduardo Valentin Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 503b085b7832..db3c13085671 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -217,26 +217,26 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { - char name[CPUFREQ_NAME_LEN]; - u8 flags; - void *driver_data; + char name[CPUFREQ_NAME_LEN]; + u8 flags; + void *driver_data; /* needed by all drivers */ - int (*init) (struct cpufreq_policy *policy); - int (*verify) (struct cpufreq_policy *policy); + int (*init)(struct cpufreq_policy *policy); + int (*verify)(struct cpufreq_policy *policy); /* define one out of two */ - int (*setpolicy) (struct cpufreq_policy *policy); + int (*setpolicy)(struct cpufreq_policy *policy); /* * On failure, should always restore frequency to policy->restore_freq * (i.e. old freq). */ - int (*target) (struct cpufreq_policy *policy, /* Deprecated */ - unsigned int target_freq, - unsigned int relation); - int (*target_index) (struct cpufreq_policy *policy, - unsigned int index); + int (*target)(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation); /* Deprecated */ + int (*target_index)(struct cpufreq_policy *policy, + unsigned int index); /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -252,27 +252,27 @@ struct cpufreq_driver { * wish to switch to intermediate frequency for some target frequency. * In that case core will directly call ->target_index(). */ - unsigned int (*get_intermediate)(struct cpufreq_policy *policy, - unsigned int index); - int (*target_intermediate)(struct cpufreq_policy *policy, - unsigned int index); + unsigned int (*get_intermediate)(struct cpufreq_policy *policy, + unsigned int index); + int (*target_intermediate)(struct cpufreq_policy *policy, + unsigned int index); /* should be defined, if possible */ - unsigned int (*get) (unsigned int cpu); + unsigned int (*get)(unsigned int cpu); /* optional */ - int (*bios_limit) (int cpu, unsigned int *limit); + int (*bios_limit)(int cpu, unsigned int *limit); - int (*exit) (struct cpufreq_policy *policy); - void (*stop_cpu) (struct cpufreq_policy *policy); - int (*suspend) (struct cpufreq_policy *policy); - int (*resume) (struct cpufreq_policy *policy); - struct freq_attr **attr; + int (*exit)(struct cpufreq_policy *policy); + void (*stop_cpu)(struct cpufreq_policy *policy); + int (*suspend)(struct cpufreq_policy *policy); + int (*resume)(struct cpufreq_policy *policy); + struct freq_attr **attr; /* platform specific boost support code */ - bool boost_supported; - bool boost_enabled; - int (*set_boost) (int state); + bool boost_supported; + bool boost_enabled; + int (*set_boost)(int state); }; /* flags */ From d3a10c14755a5929eaf43cea760bc307ee7e96bc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Nov 2014 06:07:50 +0530 Subject: [PATCH 14/21] cpufreq-dt: pass 'policy->related_cpus' to of_cpufreq_cooling_register() The second parameter of of_cpufreq_cooling_register() should be the CPUs to which the frequency constraint will apply. As the cpufreq-dt driver now supports platforms with multiple 'struct cpufreq_policy' instances (i.e. > 1 clock domains for CPUs), passing 'cpu_present_mask' isn't correct anymore. As every policy will have a set of CPUs and that may not be equal to 'cpu_present_mask' always. So, pass only mask of CPUs which are controlled by current policy. Signed-off-by: Viresh Kumar Reviewed-by: Eduardo Valentin Tested-by: Eduardo Valentin Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 8cba13df5f28..7374fc4b6bb3 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -274,7 +274,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) * thermal DT code takes care of matching them. */ if (of_find_property(np, "#cooling-cells", NULL)) { - cdev = of_cpufreq_cooling_register(np, cpu_present_mask); + cdev = of_cpufreq_cooling_register(np, policy->related_cpus); if (IS_ERR(cdev)) dev_err(cpu_dev, "running cpufreq without cooling device: %ld\n", From 7c45cf31b3ab9be270a7bf6af2926631dc566436 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Nov 2014 06:07:51 +0530 Subject: [PATCH 15/21] cpufreq: Introduce ->ready() callback for cpufreq drivers Currently there is no callback for cpufreq drivers which is called once the policy is ready to be used. There are some requirements where such a callback is required. One of them is registering a cooling device with the help of of_cpufreq_cooling_register(). This routine tries to get 'struct cpufreq_policy' for CPUs which isn't yet initialed at the time ->init() is called and so we face issues while registering the cooling device. Because we can't register cooling device from ->init(), we need a callback that is called after the policy is ready to be used and hence we introduce ->ready() callback. Signed-off-by: Viresh Kumar Reviewed-by: Eduardo Valentin Tested-by: Eduardo Valentin Reviewed-by: Lukasz Majewski Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++++ include/linux/cpufreq.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index de2c3e198b62..a09a29c312a9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1285,8 +1285,13 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); + up_read(&cpufreq_rwsem); + /* Callback for handling stuff after policy is ready */ + if (cpufreq_driver->ready) + cpufreq_driver->ready(policy); + pr_debug("initialization complete\n"); return 0; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index db3c13085671..4d078cebafd2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -267,6 +267,10 @@ struct cpufreq_driver { void (*stop_cpu)(struct cpufreq_policy *policy); int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); + + /* Will be called after the driver is fully initialized */ + void (*ready)(struct cpufreq_policy *policy); + struct freq_attr **attr; /* platform specific boost support code */ From 9a004428d77f1571c883b993d77ec64767b1959a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Nov 2014 06:07:52 +0530 Subject: [PATCH 16/21] cpufreq-dt: register cooling device from ->ready() callback Currently we are calling of_cpufreq_cooling_register() from ->init() callback. At this point of time cpufreq driver's policy isn't completely ready to be used as few of its fields/structure/pointers aren't yet initialized. Because of_cpufreq_cooling_register() tries to access policy with help of cpufreq_cpu_get() and then tries to get freq-table as well, these calls fail. To fix this, register the cooling device after the policy is ready to be used. And the right callback for it is the newly added ->ready() one. Signed-off-by: Viresh Kumar Reviewed-by: Eduardo Valentin Tested-by: Eduardo Valentin Reviewed-by: Lukasz Majewski Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 51 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 7374fc4b6bb3..e720954244b1 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -186,7 +186,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_dt_platform_data *pd; struct cpufreq_frequency_table *freq_table; - struct thermal_cooling_device *cdev; struct device_node *np; struct private_data *priv; struct device *cpu_dev; @@ -269,20 +268,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_priv; } - /* - * For now, just loading the cooling device; - * thermal DT code takes care of matching them. - */ - if (of_find_property(np, "#cooling-cells", NULL)) { - cdev = of_cpufreq_cooling_register(np, policy->related_cpus); - if (IS_ERR(cdev)) - dev_err(cpu_dev, - "running cpufreq without cooling device: %ld\n", - PTR_ERR(cdev)); - else - priv->cdev = cdev; - } - priv->cpu_dev = cpu_dev; priv->cpu_reg = cpu_reg; policy->driver_data = priv; @@ -292,7 +277,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) { dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__, ret); - goto out_cooling_unregister; + goto out_free_cpufreq_table; } policy->cpuinfo.transition_latency = transition_latency; @@ -305,8 +290,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) return 0; -out_cooling_unregister: - cpufreq_cooling_unregister(priv->cdev); +out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); out_free_priv: kfree(priv); @@ -324,7 +308,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy) { struct private_data *priv = policy->driver_data; - cpufreq_cooling_unregister(priv->cdev); + if (priv->cdev) + cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); clk_put(policy->clk); if (!IS_ERR(priv->cpu_reg)) @@ -334,6 +319,33 @@ static int cpufreq_exit(struct cpufreq_policy *policy) return 0; } +static void cpufreq_ready(struct cpufreq_policy *policy) +{ + struct private_data *priv = policy->driver_data; + struct device_node *np = of_node_get(priv->cpu_dev->of_node); + + if (WARN_ON(!np)) + return; + + /* + * For now, just loading the cooling device; + * thermal DT code takes care of matching them. + */ + if (of_find_property(np, "#cooling-cells", NULL)) { + priv->cdev = of_cpufreq_cooling_register(np, + policy->related_cpus); + if (IS_ERR(priv->cdev)) { + dev_err(priv->cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(priv->cdev)); + + priv->cdev = NULL; + } + } + + of_node_put(np); +} + static struct cpufreq_driver dt_cpufreq_driver = { .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, @@ -341,6 +353,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { .get = cpufreq_generic_get, .init = cpufreq_init, .exit = cpufreq_exit, + .ready = cpufreq_ready, .name = "cpufreq-dt", .attr = cpufreq_generic_attr, }; From 2f0f609f2e3b874e8acf895459939903e6574d9c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Nov 2014 16:04:21 +0530 Subject: [PATCH 17/21] cpufreq-dt: free OPP table created during ->init() OPP layer now supports freeing of OPPs and we should free them once they aren't useful anymore. Signed-off-by: Viresh Kumar Tested-by: Stefan Wahren Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index e720954244b1..9bc2720628a4 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -214,7 +214,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { ret = -ENOMEM; - goto out_put_node; + goto out_free_opp; } of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); @@ -294,7 +294,8 @@ out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); out_free_priv: kfree(priv); -out_put_node: +out_free_opp: + of_free_opp_table(cpu_dev); of_node_put(np); out_put_reg_clk: clk_put(cpu_clk); @@ -311,6 +312,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) if (priv->cdev) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); + of_free_opp_table(priv->cpu_dev); clk_put(policy->clk); if (!IS_ERR(priv->cpu_reg)) regulator_put(priv->cpu_reg); From c6104fdbb2f83ad7696220e14fee54e14935f04b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Nov 2014 16:04:22 +0530 Subject: [PATCH 18/21] exynos5440: free OPP table created during ->init() OPP layer now supports freeing of OPPs and we should free them once they aren't useful anymore. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/exynos5440-cpufreq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c index f33f25b483ca..27a57ed9eb2c 100644 --- a/drivers/cpufreq/exynos5440-cpufreq.c +++ b/drivers/cpufreq/exynos5440-cpufreq.c @@ -371,7 +371,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) if (ret) { dev_err(dvfs_info->dev, "failed to init cpufreq table: %d\n", ret); - goto err_put_node; + goto err_free_opp; } dvfs_info->freq_count = dev_pm_opp_get_opp_count(dvfs_info->dev); exynos_sort_descend_freq_table(); @@ -423,6 +423,8 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) err_free_table: dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table); +err_free_opp: + of_free_opp_table(dvfs_info->dev); err_put_node: of_node_put(np); dev_err(&pdev->dev, "%s: failed initialization\n", __func__); @@ -433,6 +435,7 @@ static int exynos_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&exynos_driver); dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table); + of_free_opp_table(dvfs_info->dev); return 0; } From cc87b8a8e94d840863334151f3525fd286c9ae23 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Nov 2014 16:04:23 +0530 Subject: [PATCH 19/21] imx6q: free OPP table created during ->init() OPP layer now supports freeing of OPPs and we should free them once they aren't useful anymore. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index c2d30765bf3d..5da1d131f770 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -31,6 +31,7 @@ static struct clk *step_clk; static struct clk *pll2_pfd2_396m_clk; static struct device *cpu_dev; +static bool free_opp; static struct cpufreq_frequency_table *freq_table; static unsigned int transition_latency; @@ -207,11 +208,14 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } + /* Because we have added the OPPs here, we must free them */ + free_opp = true; + num = dev_pm_opp_get_opp_count(cpu_dev); if (num < 0) { ret = num; dev_err(cpu_dev, "no OPP table is found: %d\n", ret); - goto put_reg; + goto out_free_opp; } } @@ -306,6 +310,9 @@ soc_opp_out: free_freq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); +out_free_opp: + if (free_opp) + of_free_opp_table(cpu_dev); put_reg: if (!IS_ERR(arm_reg)) regulator_put(arm_reg); @@ -332,6 +339,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&imx6q_cpufreq_driver); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); + if (free_opp) + of_free_opp_table(cpu_dev); regulator_put(arm_reg); if (!IS_ERR(pu_reg)) regulator_put(pu_reg); From 493b4cd285e68d8d8d5853a2536ea06f8cabeaeb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Nov 2014 16:04:20 +0530 Subject: [PATCH 20/21] cpufreq: arm_big_little: free OPP table created during ->init() OPP layer now supports freeing of OPPs and we should free them once they aren't useful anymore. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 7 ++++++- drivers/cpufreq/arm_big_little.h | 5 ++++- drivers/cpufreq/arm_big_little_dt.c | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index a46c223c2506..e1a6ba66a7f5 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -289,6 +289,8 @@ static void _put_cluster_clk_and_freq_table(struct device *cpu_dev) clk_put(clk[cluster]); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]); + if (arm_bL_ops->free_opp_table) + arm_bL_ops->free_opp_table(cpu_dev); dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster); } @@ -337,7 +339,7 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) if (ret) { dev_err(cpu_dev, "%s: failed to init cpufreq table, cpu: %d, err: %d\n", __func__, cpu_dev->id, ret); - goto out; + goto free_opp_table; } name[12] = cluster + '0'; @@ -354,6 +356,9 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) ret = PTR_ERR(clk[cluster]); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]); +free_opp_table: + if (arm_bL_ops->free_opp_table) + arm_bL_ops->free_opp_table(cpu_dev); out: dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__, cluster); diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index 70f18fc12d4a..a211f7db9d32 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -25,13 +25,16 @@ struct cpufreq_arm_bL_ops { char name[CPUFREQ_NAME_LEN]; - int (*get_transition_latency)(struct device *cpu_dev); /* * This must set opp table for cpu_dev in a similar way as done by * of_init_opp_table(). */ int (*init_opp_table)(struct device *cpu_dev); + + /* Optional */ + int (*get_transition_latency)(struct device *cpu_dev); + void (*free_opp_table)(struct device *cpu_dev); }; int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c index 4550f6976768..ef0b3f1324d5 100644 --- a/drivers/cpufreq/arm_big_little_dt.c +++ b/drivers/cpufreq/arm_big_little_dt.c @@ -82,6 +82,7 @@ static struct cpufreq_arm_bL_ops dt_bL_ops = { .name = "dt-bl", .get_transition_latency = dt_get_transition_latency, .init_opp_table = dt_init_opp_table, + .free_opp_table = of_free_opp_table, }; static int generic_bL_probe(struct platform_device *pdev) From 966916eabfb1726fec7ea7b69f0c7f5ce366e943 Mon Sep 17 00:00:00 2001 From: ethan zhao Date: Mon, 1 Dec 2014 11:32:08 +0900 Subject: [PATCH 21/21] intel_pstate: skip this driver if Sun server has _PPC method Oracle Sun X86 servers have dynamic power capping capability that works via ACPI _PPC method etc, so skip loading this driver if Sun server has ACPI _PPC enabled. Signed-off-by: Ethan Zhao Tested-by: Linda Knippers Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 45 +++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ab2e100a1807..1405b393c93d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1025,15 +1025,46 @@ static bool intel_pstate_no_acpi_pss(void) return true; } +static bool intel_pstate_has_acpi_ppc(void) +{ + int i; + + for_each_possible_cpu(i) { + struct acpi_processor *pr = per_cpu(processors, i); + + if (!pr) + continue; + if (acpi_has_method(pr->handle, "_PPC")) + return true; + } + return false; +} + +enum { + PSS, + PPC, +}; + struct hw_vendor_info { u16 valid; char oem_id[ACPI_OEM_ID_SIZE]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; + int oem_pwr_table; }; /* Hardware vendor-specific info that has its own power management modes */ static struct hw_vendor_info vendor_info[] = { - {1, "HP ", "ProLiant"}, + {1, "HP ", "ProLiant", PSS}, + {1, "ORACLE", "X4-2 ", PPC}, + {1, "ORACLE", "X4-2L ", PPC}, + {1, "ORACLE", "X4-2B ", PPC}, + {1, "ORACLE", "X3-2 ", PPC}, + {1, "ORACLE", "X3-2L ", PPC}, + {1, "ORACLE", "X3-2B ", PPC}, + {1, "ORACLE", "X4470M2 ", PPC}, + {1, "ORACLE", "X4270M3 ", PPC}, + {1, "ORACLE", "X4270M2 ", PPC}, + {1, "ORACLE", "X4170M2 ", PPC}, {0, "", ""}, }; @@ -1057,15 +1088,21 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void) for (v_info = vendor_info; v_info->valid; v_info++) { if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && - !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && - intel_pstate_no_acpi_pss()) - return true; + !strncmp(hdr.oem_table_id, v_info->oem_table_id, + ACPI_OEM_TABLE_ID_SIZE)) + switch (v_info->oem_pwr_table) { + case PSS: + return intel_pstate_no_acpi_pss(); + case PPC: + return intel_pstate_has_acpi_ppc(); + } } return false; } #else /* CONFIG_ACPI not enabled */ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } +static inline bool intel_pstate_has_acpi_ppc(void) { return false; } #endif /* CONFIG_ACPI */ static int __init intel_pstate_init(void)