From 7813dd6fc75fb375d4caf002e7f80a826fc3153a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 26 Sep 2017 15:12:40 -0700 Subject: [PATCH 1/9] PM / OPP: Move the OPP directory out of power/ The drivers/base/power/ directory is special and contains code related to power management core like system suspend/resume, hibernation, etc. It was fine to keep the OPP code inside it when we had just one file for it, but it is growing now and already has a directory for itself. Lets move it directly under drivers/ directory, just like cpufreq and cpuidle. Signed-off-by: Viresh Kumar Acked-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 +- drivers/Kconfig | 2 ++ drivers/Makefile | 1 + drivers/base/power/Makefile | 1 - drivers/opp/Kconfig | 13 +++++++++++++ drivers/{base/power => }/opp/Makefile | 0 drivers/{base/power => }/opp/core.c | 0 drivers/{base/power => }/opp/cpu.c | 0 drivers/{base/power => }/opp/debugfs.c | 0 drivers/{base/power => }/opp/of.c | 0 drivers/{base/power => }/opp/opp.h | 0 kernel/power/Kconfig | 14 -------------- 12 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 drivers/opp/Kconfig rename drivers/{base/power => }/opp/Makefile (100%) rename drivers/{base/power => }/opp/core.c (100%) rename drivers/{base/power => }/opp/cpu.c (100%) rename drivers/{base/power => }/opp/debugfs.c (100%) rename drivers/{base/power => }/opp/of.c (100%) rename drivers/{base/power => }/opp/opp.h (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 65b0c88d5ee0..7c8c649fc68b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10043,7 +10043,7 @@ M: Stephen Boyd L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git -F: drivers/base/power/opp/ +F: drivers/opp/ F: include/linux/pm_opp.h F: Documentation/power/opp.txt F: Documentation/devicetree/bindings/opp/ diff --git a/drivers/Kconfig b/drivers/Kconfig index 505c676fa9c7..9e264d410c23 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -208,4 +208,6 @@ source "drivers/tee/Kconfig" source "drivers/mux/Kconfig" +source "drivers/opp/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index d90fdc413648..dd718a3007e9 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -125,6 +125,7 @@ obj-$(CONFIG_ACCESSIBILITY) += accessibility/ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_EISA) += eisa/ +obj-$(CONFIG_PM_OPP) += opp/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ obj-y += mmc/ diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 5998c53280f5..73a1cffc0a5f 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_PM) += sysfs.o generic_ops.o common.o qos.o runtime.o wakeirq.o obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o -obj-$(CONFIG_PM_OPP) += opp/ obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o domain_governor.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig new file mode 100644 index 000000000000..a7fbb93f302c --- /dev/null +++ b/drivers/opp/Kconfig @@ -0,0 +1,13 @@ +config PM_OPP + bool + select SRCU + ---help--- + SOCs have a standard set of tuples consisting of frequency and + voltage pairs that the device will support per voltage domain. This + is called Operating Performance Point or OPP. The actual definitions + of OPP varies over silicon within the same family of devices. + + OPP layer organizes the data internally using device pointers + representing individual voltage domains and provides SOC + implementations a ready to use framework to manage OPPs. + For more information, read diff --git a/drivers/base/power/opp/Makefile b/drivers/opp/Makefile similarity index 100% rename from drivers/base/power/opp/Makefile rename to drivers/opp/Makefile diff --git a/drivers/base/power/opp/core.c b/drivers/opp/core.c similarity index 100% rename from drivers/base/power/opp/core.c rename to drivers/opp/core.c diff --git a/drivers/base/power/opp/cpu.c b/drivers/opp/cpu.c similarity index 100% rename from drivers/base/power/opp/cpu.c rename to drivers/opp/cpu.c diff --git a/drivers/base/power/opp/debugfs.c b/drivers/opp/debugfs.c similarity index 100% rename from drivers/base/power/opp/debugfs.c rename to drivers/opp/debugfs.c diff --git a/drivers/base/power/opp/of.c b/drivers/opp/of.c similarity index 100% rename from drivers/base/power/opp/of.c rename to drivers/opp/of.c diff --git a/drivers/base/power/opp/opp.h b/drivers/opp/opp.h similarity index 100% rename from drivers/base/power/opp/opp.h rename to drivers/opp/opp.h diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e8517b63eb37..e880ca22c5a5 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -259,20 +259,6 @@ config APM_EMULATION anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). -config PM_OPP - bool - select SRCU - ---help--- - SOCs have a standard set of tuples consisting of frequency and - voltage pairs that the device will support per voltage domain. This - is called Operating Performance Point or OPP. The actual definitions - of OPP varies over silicon within the same family of devices. - - OPP layer organizes the data internally using device pointers - representing individual voltage domains and provides SOC - implementations a ready to use framework to manage OPPs. - For more information, read - config PM_CLK def_bool y depends on PM && HAVE_CLK From d741029a2390406d4d94279ae5b346831a9e61e6 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 21 Sep 2017 11:15:36 +0530 Subject: [PATCH 2/9] PM / OPP: Use snprintf() to avoid kasprintf() and kfree() Use snprintf() to avoid unnecessary initializations, avoid calling kfree(). Signed-off-by: Arvind Yadav Acked-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/opp/debugfs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 81cf120fcf43..9318848f3c67 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -41,16 +41,15 @@ static bool opp_debug_create_supplies(struct dev_pm_opp *opp, { struct dentry *d; int i; - char *name; for (i = 0; i < opp_table->regulator_count; i++) { - name = kasprintf(GFP_KERNEL, "supply-%d", i); + char name[15]; + + snprintf(name, sizeof(name), "supply-%d", i); /* Create per-opp directory */ d = debugfs_create_dir(name, pdentry); - kfree(name); - if (!d) return false; From 035ed07208dc501d023873447113f3f178592156 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 29 Sep 2017 14:39:49 -0300 Subject: [PATCH 3/9] PM / OPP: Move error message to debug level On some i.MX6 platforms which do not have speed grading check, opp table will not be created in platform code, so cpufreq driver prints the following error message: cpu cpu0: dev_pm_opp_get_opp_count: OPP table not found (-19) However, this is not really an error in this case because the imx6q-cpufreq driver first calls dev_pm_opp_get_opp_count() and if it fails, it means that platform code does not provide OPP and then dev_pm_opp_of_add_table() will be called. In order to avoid such confusing error message, move it to debug level. It is up to the caller of dev_pm_opp_get_opp_count() to check its return value and decide if it will print an error or not. Signed-off-by: Fabio Estevam Signed-off-by: Rafael J. Wysocki --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index a6de32530693..0459b1204694 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -296,7 +296,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { count = PTR_ERR(opp_table); - dev_err(dev, "%s: OPP table not found (%d)\n", + dev_dbg(dev, "%s: OPP table not found (%d)\n", __func__, count); return count; } From 7978db344719dab1e56d05e6fc04aaaddcde0a5e Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Wed, 4 Oct 2017 11:35:03 +0530 Subject: [PATCH 4/9] PM / OPP: Add missing of_node_put(np) The for_each_available_child_of_node() loop in _of_add_opp_table_v2() doesn't drop the reference to "np" on errors. Fix that. Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings) Cc: 4.3+ # 4.3+ Signed-off-by: Tobias Jordan [ VK: Improved commit log. ] Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/opp/of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 0b718886479b..87509cb69f79 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -397,6 +397,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) dev_err(dev, "%s: Failed to add OPP, %d\n", __func__, ret); _dev_pm_opp_remove_table(opp_table, dev, false); + of_node_put(np); goto put_opp_table; } } From 604a7aeb4325b8ecb23df163c89fc12248302a4e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Oct 2017 17:26:21 +0530 Subject: [PATCH 5/9] PM / OPP: Rename dev_pm_opp_register_put_opp_helper() The routine is named incorrectly since the first attempt as there is nothing like a put_opp() helper. We wanted to unregister the set_opp() helper here and so it should rather be named as dev_pm_opp_unregister_set_opp_helper(). Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/opp/core.c | 6 +++--- include/linux/pm_opp.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0459b1204694..80c21207e48c 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1476,13 +1476,13 @@ err: EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper); /** - * dev_pm_opp_register_put_opp_helper() - Releases resources blocked for + * dev_pm_opp_unregister_set_opp_helper() - Releases resources blocked for * set_opp helper * @opp_table: OPP table returned from dev_pm_opp_register_set_opp_helper(). * * Release resources blocked for platform specific set_opp helper. */ -void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) +void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) { if (!opp_table->set_opp) { pr_err("%s: Doesn't have custom set_opp helper set\n", @@ -1497,7 +1497,7 @@ void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) dev_pm_opp_put_opp_table(opp_table); } -EXPORT_SYMBOL_GPL(dev_pm_opp_register_put_opp_helper); +EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); /** * dev_pm_opp_add() - Add an OPP table from a table definitions diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 51ec727b4824..849d21dc4ca7 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -124,7 +124,7 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table); +void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -243,7 +243,7 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) {} +static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { From 9867999f3a85b52f96ef05fca00cc8128eed01ce Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 12 Oct 2017 11:32:23 +0100 Subject: [PATCH 6/9] PM / OPP: add missing of_node_put() for of_get_cpu_node() Commit 762792913f8c (PM / OPP: Fix get sharing CPUs when hotplug is used) moved away from using cpu_dev->of_node because of some limitations. However, commit 7467c9d95989 (of: return of_get_cpu_node from of_cpu_device_node_get if CPUs are not registered) added support to fall back to of_get_cpu_node() if called if CPUs are not registered yet. Add the missing of_node_put() for the CPU device nodes. Also go back to using of_cpu_device_node_get() in dev_pm_opp_of_get_sharing_cpus() to avoid scanning the device tree again. Acked-by: Viresh Kumar Fixes: 762792913f8c (PM / OPP: Fix get sharing CPUs when hotplug is used) Signed-off-by: Sudeep Holla Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/opp/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 87509cb69f79..cb716aa2f44b 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -604,7 +604,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, if (cpu == cpu_dev->id) continue; - cpu_np = of_get_cpu_node(cpu, NULL); + cpu_np = of_cpu_device_node_get(cpu); if (!cpu_np) { dev_err(cpu_dev, "%s: failed to get cpu%d node\n", __func__, cpu); @@ -614,6 +614,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, /* Get OPP descriptor node */ tmp_np = _opp_of_get_opp_desc_node(cpu_np); + of_node_put(cpu_np); if (!tmp_np) { pr_err("%pOF: Couldn't find opp node\n", cpu_np); ret = -ENOENT; From 009acd196fc860045bf7b2c3f5812f0f5efb2782 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Oct 2017 12:54:14 +0530 Subject: [PATCH 7/9] PM / OPP: Support updating performance state of device's power domain The genpd framework now provides an API to request device's power domain to update its performance state. Use that interface from the OPP core for devices whose power domains support performance states. Note that this commit doesn't add any mechanism by which performance states are made available to the OPP core. That would be done by a later commit. Note that the current implementation is restricted to the case where the device doesn't have separate regulators for itself. We shouldn't over engineer the code before we have real use case for them. We can always come back and add more code to support such cases later on. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/opp/core.c | 57 ++++++++++++++++++++++++++++++++++++++++++- drivers/opp/debugfs.c | 3 +++ drivers/opp/opp.h | 4 +++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 80c21207e48c..0ce8069d6843 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "opp.h" @@ -535,6 +536,44 @@ _generic_set_opp_clk_only(struct device *dev, struct clk *clk, return ret; } +static inline int +_generic_set_opp_domain(struct device *dev, struct clk *clk, + unsigned long old_freq, unsigned long freq, + unsigned int old_pstate, unsigned int new_pstate) +{ + int ret; + + /* Scaling up? Scale domain performance state before frequency */ + if (freq > old_freq) { + ret = dev_pm_genpd_set_performance_state(dev, new_pstate); + if (ret) + return ret; + } + + ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq); + if (ret) + goto restore_domain_state; + + /* Scaling down? Scale domain performance state after frequency */ + if (freq < old_freq) { + ret = dev_pm_genpd_set_performance_state(dev, new_pstate); + if (ret) + goto restore_freq; + } + + return 0; + +restore_freq: + if (_generic_set_opp_clk_only(dev, clk, freq, old_freq)) + dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", + __func__, old_freq); +restore_domain_state: + if (freq > old_freq) + dev_pm_genpd_set_performance_state(dev, old_pstate); + + return ret; +} + static int _generic_set_opp_regulator(const struct opp_table *opp_table, struct device *dev, unsigned long old_freq, @@ -653,7 +692,16 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) /* Only frequency scaling */ if (!opp_table->regulators) { - ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq); + /* + * We don't support devices with both regulator and + * domain performance-state for now. + */ + if (opp_table->genpd_performance_state) + ret = _generic_set_opp_domain(dev, clk, old_freq, freq, + IS_ERR(old_opp) ? 0 : old_opp->pstate, + opp->pstate); + else + ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq); } else if (!opp_table->set_opp) { ret = _generic_set_opp_regulator(opp_table, dev, old_freq, freq, IS_ERR(old_opp) ? NULL : old_opp->supplies, @@ -1706,6 +1754,13 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, if (remove_all || !opp->dynamic) dev_pm_opp_put(opp); } + + /* + * The OPP table is getting removed, drop the performance state + * constraints. + */ + if (opp_table->genpd_performance_state) + dev_pm_genpd_set_performance_state(dev, 0); } else { _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); } diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 9318848f3c67..b03c03576a62 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -99,6 +99,9 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend)) return -ENOMEM; + if (!debugfs_create_u32("performance_state", S_IRUGO, d, &opp->pstate)) + return -ENOMEM; + if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate)) return -ENOMEM; diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 166eef990599..e8f767ab5814 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -58,6 +58,7 @@ extern struct list_head opp_tables; * @dynamic: not-created from static DT entries. * @turbo: true if turbo (boost) OPP * @suspend: true if suspend OPP + * @pstate: Device's power domain's performance state. * @rate: Frequency in hertz * @supplies: Power supplies voltage/current values * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's @@ -76,6 +77,7 @@ struct dev_pm_opp { bool dynamic; bool turbo; bool suspend; + unsigned int pstate; unsigned long rate; struct dev_pm_opp_supply *supplies; @@ -135,6 +137,7 @@ enum opp_table_access { * @clk: Device's clock handle * @regulators: Supply regulators * @regulator_count: Number of power supply regulators + * @genpd_performance_state: Device's power domain support performance state. * @set_opp: Platform specific set_opp callback * @set_opp_data: Data to be passed to set_opp callback * @dentry: debugfs dentry pointer of the real device directory (not links). @@ -170,6 +173,7 @@ struct opp_table { struct clk *clk; struct regulator **regulators; unsigned int regulator_count; + bool genpd_performance_state; int (*set_opp)(struct dev_pm_set_opp_data *data); struct dev_pm_set_opp_data *set_opp_data; From b6aa98364f842f943495408895627702ad7ad44b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Oct 2017 12:54:15 +0530 Subject: [PATCH 8/9] PM / OPP: Add dev_pm_opp_{un}register_get_pstate_helper() This adds the dev_pm_opp_{un}register_get_pstate_helper() helper routines which will be used to set the get_pstate() callback for a device. This callback will be later called internally by the OPP core to get performance state corresponding to an OPP. This is required temporarily until the time we have proper DT bindings to include the performance state information. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/opp/core.c | 78 ++++++++++++++++++++++++++++++++++++++++++ drivers/opp/opp.h | 2 ++ include/linux/pm_opp.h | 10 ++++++ 3 files changed, 90 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0ce8069d6843..92fa94a6dcc1 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1036,6 +1036,9 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, return ret; } + if (opp_table->get_pstate) + new_opp->pstate = opp_table->get_pstate(dev, new_opp->rate); + list_add(&new_opp->node, head); mutex_unlock(&opp_table->lock); @@ -1547,6 +1550,81 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); +/** + * dev_pm_opp_register_get_pstate_helper() - Register get_pstate() helper. + * @dev: Device for which the helper is getting registered. + * @get_pstate: Helper. + * + * TODO: Remove this callback after the same information is available via Device + * Tree. + * + * This allows a platform to initialize the performance states of individual + * OPPs for its devices, until we get similar information directly from DT. + * + * This must be called before the OPPs are initialized for the device. + */ +struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, + int (*get_pstate)(struct device *dev, unsigned long rate)) +{ + struct opp_table *opp_table; + int ret; + + if (!get_pstate) + return ERR_PTR(-EINVAL); + + opp_table = dev_pm_opp_get_opp_table(dev); + if (!opp_table) + return ERR_PTR(-ENOMEM); + + /* This should be called before OPPs are initialized */ + if (WARN_ON(!list_empty(&opp_table->opp_list))) { + ret = -EBUSY; + goto err; + } + + /* Already have genpd_performance_state set */ + if (WARN_ON(opp_table->genpd_performance_state)) { + ret = -EBUSY; + goto err; + } + + opp_table->genpd_performance_state = true; + opp_table->get_pstate = get_pstate; + + return opp_table; + +err: + dev_pm_opp_put_opp_table(opp_table); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_register_get_pstate_helper); + +/** + * dev_pm_opp_unregister_get_pstate_helper() - Releases resources blocked for + * get_pstate() helper + * @opp_table: OPP table returned from dev_pm_opp_register_get_pstate_helper(). + * + * Release resources blocked for platform specific get_pstate() helper. + */ +void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) +{ + if (!opp_table->genpd_performance_state) { + pr_err("%s: Doesn't have performance states set\n", + __func__); + return; + } + + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); + + opp_table->genpd_performance_state = false; + opp_table->get_pstate = NULL; + + dev_pm_opp_put_opp_table(opp_table); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_get_pstate_helper); + /** * dev_pm_opp_add() - Add an OPP table from a table definitions * @dev: device for which we do this operation diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index e8f767ab5814..4d00061648a3 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -140,6 +140,7 @@ enum opp_table_access { * @genpd_performance_state: Device's power domain support performance state. * @set_opp: Platform specific set_opp callback * @set_opp_data: Data to be passed to set_opp callback + * @get_pstate: Platform specific get_pstate callback * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -177,6 +178,7 @@ struct opp_table { int (*set_opp)(struct dev_pm_set_opp_data *data); struct dev_pm_set_opp_data *set_opp_data; + int (*get_pstate)(struct device *dev, unsigned long rate); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 849d21dc4ca7..6c2d2e88f066 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -125,6 +125,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, int (*get_pstate)(struct device *dev, unsigned long rate)); +void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -245,6 +247,14 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, + int (*get_pstate)(struct device *dev, unsigned long rate)) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) {} + static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { return ERR_PTR(-ENOTSUPP); From 07458f6a5171d97511dfbdf6ce549ed2ca0280c7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 Nov 2017 20:23:55 +0530 Subject: [PATCH 9/9] cpufreq: schedutil: Reset cached_raw_freq when not in sync with next_freq 'cached_raw_freq' is used to get the next frequency quickly but should always be in sync with sg_policy->next_freq. There is a case where it is not and in such cases it should be reset to avoid switching to incorrect frequencies. Consider this case for example: - policy->cur is 1.2 GHz (Max) - New request comes for 780 MHz and we store that in cached_raw_freq. - Based on 780 MHz, we calculate the effective frequency as 800 MHz. - We then see the CPU wasn't idle recently and choose to keep the next freq as 1.2 GHz. - Now we have cached_raw_freq is 780 MHz and sg_policy->next_freq is 1.2 GHz. - Now if the utilization doesn't change in then next request, then the next target frequency will still be 780 MHz and it will match with cached_raw_freq. But we will choose 1.2 GHz instead of 800 MHz here. Fixes: b7eaf1aab9f8 (cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely) Signed-off-by: Viresh Kumar Cc: 4.12+ # 4.12+ Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index ba0da243fdd8..2f52ec0f1539 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -282,8 +282,12 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. */ - if (busy && next_f < sg_policy->next_freq) + if (busy && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; + + /* Reset cached freq as next_freq has changed */ + sg_policy->cached_raw_freq = 0; + } } sugov_update_commit(sg_policy, time, next_f); }