From de1df26b7cef702a32ae876ed45c1112f523df48 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 02:37:42 +0100 Subject: [PATCH 01/94] cpufreq: Clean up default and fallback governor setup The preprocessor magic used for setting the default cpufreq governor (and for using the performance governor as a fallback one for that matter) is really nasty, so replace it with __weak functions and overrides. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 37 +++++++++++++++----------- drivers/cpufreq/cpufreq_conservative.c | 10 ++++--- drivers/cpufreq/cpufreq_ondemand.c | 36 ++++++++++++------------- drivers/cpufreq/cpufreq_performance.c | 18 ++++++++++--- drivers/cpufreq/cpufreq_powersave.c | 10 ++++--- drivers/cpufreq/cpufreq_userspace.c | 10 ++++--- include/linux/cpufreq.h | 25 ++--------------- 7 files changed, 73 insertions(+), 73 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e979ec78b695..34b17447e0d1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -959,6 +959,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return cpufreq_add_dev_symlink(policy); } +__weak struct cpufreq_governor *cpufreq_default_governor(void) +{ + return NULL; +} + static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; @@ -968,11 +973,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) /* Update governor of new_policy to the governor used before hotplug */ gov = find_governor(policy->last_governor); - if (gov) + if (gov) { pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); - else - gov = CPUFREQ_DEFAULT_GOVERNOR; + } else { + gov = cpufreq_default_governor(); + if (!gov) + return -ENODATA; + } new_policy.governor = gov; @@ -1920,21 +1928,16 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); +__weak struct cpufreq_governor *cpufreq_fallback_governor(void) +{ + return NULL; +} + static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { int ret; - /* Only must be defined when default governor is known to have latency - restrictions, like e.g. conservative or ondemand. - That this is the case is already ensured in Kconfig - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE - struct cpufreq_governor *gov = &cpufreq_gov_performance; -#else - struct cpufreq_governor *gov = NULL; -#endif - /* Don't start any governor operations if we are entering suspend */ if (cpufreq_suspended) return 0; @@ -1948,12 +1951,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, if (policy->governor->max_transition_latency && policy->cpuinfo.transition_latency > policy->governor->max_transition_latency) { - if (!gov) - return -EINVAL; - else { + struct cpufreq_governor *gov = cpufreq_fallback_governor(); + + if (gov) { pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", policy->governor->name, gov->name); policy->governor = gov; + } else { + return -EINVAL; } } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 606ad74abe6e..8504a70a4785 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -26,10 +26,7 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE -static -#endif -struct cpufreq_governor cpufreq_gov_conservative = { +static struct cpufreq_governor cpufreq_gov_conservative = { .name = "conservative", .governor = cs_cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, @@ -399,6 +396,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_conservative; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index eae51070c034..929e193ac1c1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -31,9 +31,7 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); static struct od_ops od_ops; -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND static struct cpufreq_governor cpufreq_gov_ondemand; -#endif static unsigned int default_powersave_bias; @@ -554,6 +552,19 @@ static struct common_dbs_data od_dbs_cdata = { .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), }; +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); +} + +static struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; @@ -605,22 +616,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); -} - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = od_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { return cpufreq_register_governor(&cpufreq_gov_ondemand); @@ -638,6 +633,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_ondemand; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index cf117deb39b1..af9f4b96f5a8 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, return 0; } -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE -static -#endif -struct cpufreq_governor cpufreq_gov_performance = { +static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .governor = cpufreq_governor_performance, .owner = THIS_MODULE, @@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void) cpufreq_unregister_governor(&cpufreq_gov_performance); } +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif +#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +struct cpufreq_governor *cpufreq_fallback_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif + MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index e3b874c235ea..b8b400232a74 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE -static -#endif -struct cpufreq_governor cpufreq_gov_powersave = { +static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, @@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_powersave; +} + fs_initcall(cpufreq_gov_powersave_init); #else module_init(cpufreq_gov_powersave_init); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 4dbf1db16aca..4d16f45ee1da 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, return rc; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE -static -#endif -struct cpufreq_governor cpufreq_gov_userspace = { +static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, .store_setspeed = cpufreq_set, @@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_userspace; +} + fs_initcall(cpufreq_gov_userspace_init); #else module_init(cpufreq_gov_userspace_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 88a4215125bc..d0bf555b6bbf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -464,29 +464,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -/* CPUFREQ DEFAULT GOVERNOR */ -/* - * Performance governor is fallback governor if any other gov failed to auto - * load due latency restrictions - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE -extern struct cpufreq_governor cpufreq_gov_performance; -#endif -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) -extern struct cpufreq_governor cpufreq_gov_powersave; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) -extern struct cpufreq_governor cpufreq_gov_userspace; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) -extern struct cpufreq_governor cpufreq_gov_ondemand; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) -extern struct cpufreq_governor cpufreq_gov_conservative; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) -#endif +struct cpufreq_governor *cpufreq_default_governor(void); +struct cpufreq_governor *cpufreq_fallback_governor(void); /********************************************************************* * FREQUENCY TABLE HELPERS * From 86622cb8c57abb05fe95bea3a068949c0ca79fc3 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 3 Feb 2016 01:11:37 +0530 Subject: [PATCH 02/94] cpufreq: powernv: Free 'chips' on module exit This will free the dynamically allocated memory of 'chips' on module exit. Signed-off-by: Shilpasri G Bhat Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 547890fd9572..53f980bf9b77 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -612,6 +612,7 @@ static void __exit powernv_cpufreq_exit(void) unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); opal_message_notifier_unregister(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); + kfree(chips); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); From 6d167a44e6c8da3316e037b788585fcf96112bea Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 3 Feb 2016 01:11:38 +0530 Subject: [PATCH 03/94] cpufreq: powernv: Hot-plug safe the kworker thread In the kworker_thread powernv_cpufreq_work_fn(), we can end up sending an IPI to a cpu going offline. This is a rare corner case which is fixed using {get/put}_online_cpus(). Along with this fix, this patch adds changes to do oneshot cpumask_{clear/and} operation. Suggested-by: Shreyas B Prabhu Suggested-by: Gautham R Shenoy Signed-off-by: Shilpasri G Bhat Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 53f980bf9b77..a271b0fbe8b9 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -423,18 +424,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); unsigned int cpu; - cpumask_var_t mask; + cpumask_t mask; - smp_call_function_any(&chip->mask, + get_online_cpus(); + cpumask_and(&mask, &chip->mask, cpu_online_mask); + smp_call_function_any(&mask, powernv_cpufreq_throttle_check, NULL, 0); if (!chip->restore) - return; + goto out; chip->restore = false; - cpumask_copy(mask, &chip->mask); - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int index, tcpu; + for_each_cpu(cpu, &mask) { + int index; struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); @@ -442,9 +444,10 @@ void powernv_cpufreq_work_fn(struct work_struct *work) policy.cur, CPUFREQ_RELATION_C, &index); powernv_cpufreq_target_index(&policy, index); - for_each_cpu(tcpu, policy.cpus) - cpumask_clear_cpu(tcpu, mask); + cpumask_andnot(&mask, &mask, policy.cpus); } +out: + put_online_cpus(); } static char throttle_reason[][30] = { From 96c4726f01cdbf53acf74cf2394e287d74bf40a3 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 3 Feb 2016 01:11:39 +0530 Subject: [PATCH 04/94] cpufreq: powernv: Remove cpu_to_chip_id() from hot-path cpu_to_chip_id() does a DT walk through to find out the chip id by taking a contended device tree lock. This adds an unnecessary overhead in a hot path. So instead of calling cpu_to_chip_id() everytime cache the chip ids for all cores in the array 'core_to_chip_map' and use it in the hotpath. Reported-by: Anton Blanchard Signed-off-by: Shilpasri G Bhat Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index a271b0fbe8b9..c670314053af 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -43,6 +43,7 @@ static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; +static unsigned int *core_to_chip_map; static struct chip { unsigned int id; @@ -313,13 +314,14 @@ static inline unsigned int get_nominal_index(void) static void powernv_cpufreq_throttle_check(void *data) { unsigned int cpu = smp_processor_id(); + unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)]; unsigned long pmsr; int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); for (i = 0; i < nr_chips; i++) - if (chips[i].id == cpu_to_chip_id(cpu)) + if (chips[i].id == chip_id) break; /* Check for Pmax Capping */ @@ -559,19 +561,29 @@ static int init_chip_info(void) unsigned int chip[256]; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t cpu_mask; + int ret = -ENOMEM; - for_each_possible_cpu(cpu) { + core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int), + GFP_KERNEL); + if (!core_to_chip_map) + goto out; + + cpumask_copy(&cpu_mask, cpu_possible_mask); + for_each_cpu(cpu, &cpu_mask) { unsigned int id = cpu_to_chip_id(cpu); if (prev_chip_id != id) { prev_chip_id = id; chip[nr_chips++] = id; } + core_to_chip_map[cpu_core_index_of_thread(cpu)] = id; + cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); } chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) - return -ENOMEM; + goto free_chip_map; for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; @@ -582,6 +594,10 @@ static int init_chip_info(void) } return 0; +free_chip_map: + kfree(core_to_chip_map); +out: + return ret; } static int __init powernv_cpufreq_init(void) @@ -616,6 +632,7 @@ static void __exit powernv_cpufreq_exit(void) opal_message_notifier_unregister(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); kfree(chips); + kfree(core_to_chip_map); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); From 0306e481d479a58eff17c27adf213fbb5822946b Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 3 Feb 2016 01:11:40 +0530 Subject: [PATCH 05/94] cpufreq: powernv/tracing: Add powernv_throttle tracepoint This patch adds the powernv_throttle tracepoint to trace the CPU frequency throttling event, which is used by the powernv-cpufreq driver in POWER8. Signed-off-by: Shilpasri G Bhat Reviewed-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki --- include/trace/events/power.h | 22 ++++++++++++++++++++++ kernel/trace/power-traces.c | 1 + 2 files changed, 23 insertions(+) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 284244ebfe8d..19e50300ce7d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -38,6 +38,28 @@ DEFINE_EVENT(cpu, cpu_idle, TP_ARGS(state, cpu_id) ); +TRACE_EVENT(powernv_throttle, + + TP_PROTO(int chip_id, const char *reason, int pmax), + + TP_ARGS(chip_id, reason, pmax), + + TP_STRUCT__entry( + __field(int, chip_id) + __string(reason, reason) + __field(int, pmax) + ), + + TP_fast_assign( + __entry->chip_id = chip_id; + __assign_str(reason, reason); + __entry->pmax = pmax; + ), + + TP_printk("Chip %d Pmax %d %s", __entry->chip_id, + __entry->pmax, __get_str(reason)) +); + TRACE_EVENT(pstate_sample, TP_PROTO(u32 core_busy, diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index eb4220a132ec..81b87451c0ea 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -15,4 +15,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); From c89f2682a39192433c296bf97b834fd2815a758b Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 3 Feb 2016 01:11:41 +0530 Subject: [PATCH 06/94] cpufreq: powernv: Replace pr_info with trace print for throttle event Currently we use printk message to notify the throttle event. But this can flood the console if the cpu is throttled frequently. So replace the printk with the tracepoint to notify the throttle event. And also events like throttle below nominal frequency and OCC_RESET are reduced to pr_warn/pr_warn_once as pointed by MFG to not mark them as critical messages. This patch adds 'throttle_reason' to struct chip to store the throttle reason. Signed-off-by: Shilpasri G Bhat Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 75 +++++++++++++++---------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index c670314053af..1bbc10a54c59 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -45,12 +46,22 @@ static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; static unsigned int *core_to_chip_map; +static const char * const throttle_reason[] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" +}; + static struct chip { unsigned int id; bool throttled; + bool restore; + u8 throttle_reason; cpumask_t mask; struct work_struct throttle; - bool restore; } *chips; static int nr_chips; @@ -331,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data) goto next; chips[i].throttled = true; if (pmsr_pmax < powernv_pstate_info.nominal) - pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.nominal); - else - pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.max); + pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.nominal); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } else if (chips[i].throttled) { chips[i].throttled = false; - pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, - chips[i].id, pmsr_pmax); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } /* Check if Psafe_mode_active is set in PMSR. */ @@ -359,7 +370,7 @@ next: if (throttled) { pr_info("PMSR = %16lx\n", pmsr); - pr_crit("CPU Frequency could be throttled\n"); + pr_warn("CPU Frequency could be throttled\n"); } } @@ -452,15 +463,6 @@ out: put_online_cpus(); } -static char throttle_reason[][30] = { - "No throttling", - "Power Cap", - "Processor Over Temperature", - "Power Supply Failure", - "Over Current", - "OCC Reset" - }; - static int powernv_cpufreq_occ_msg(struct notifier_block *nb, unsigned long msg_type, void *_msg) { @@ -486,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, */ if (!throttled) { throttled = true; - pr_crit("CPU frequency is throttled for duration\n"); + pr_warn("CPU frequency is throttled for duration\n"); } break; @@ -510,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, return 0; } - if (omsg.throttle_status && - omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) - pr_info("OCC: Chip %u Pmax reduced due to %s\n", - (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else if (!omsg.throttle_status) - pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else - return 0; - for (i = 0; i < nr_chips; i++) - if (chips[i].id == omsg.chip) { - if (!omsg.throttle_status) - chips[i].restore = true; - schedule_work(&chips[i].throttle); - } + if (chips[i].id == omsg.chip) + break; + + if (omsg.throttle_status >= 0 && + omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) + chips[i].throttle_reason = omsg.throttle_status; + + if (!omsg.throttle_status) + chips[i].restore = true; + + schedule_work(&chips[i].throttle); } return 0; } @@ -581,16 +578,14 @@ static int init_chip_info(void) cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); } - chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) goto free_chip_map; for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - chips[i].throttled = false; cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); - chips[i].restore = false; } return 0; From 896d6a4c0f41a93809b83f9e58aad73874a89d99 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:40 +0530 Subject: [PATCH 07/94] cpufreq: dt: Convert few pr_debug/err() calls to dev_dbg/err() We have the device structure available now, lets use it for better print messages. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0ca74d070058..ace0168274d4 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -246,7 +246,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { - pr_debug("OPP table is not ready, deferring probe\n"); + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); ret = -EPROBE_DEFER; goto out_free_opp; } @@ -325,7 +325,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { - pr_err("failed to init cpufreq table: %d\n", ret); + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); goto out_free_priv; } From 457e99e60a8f5a40b7da204c0bfc8a86ad2161b9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:41 +0530 Subject: [PATCH 08/94] cpufreq: dt: Rename 'need_update' to 'opp_v1' That's the real purpose of this field, i.e. to take special care of old OPP V1 bindings. Lets name it accordingly, so that it can be used elsewhere. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index ace0168274d4..0047d20803db 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -199,7 +199,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct dev_pm_opp *suspend_opp; unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; - bool need_update = false; + bool opp_v1 = false; int ret; ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); @@ -223,7 +223,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) * finding shared-OPPs for backward compatibility. */ if (ret == -ENOENT) - need_update = true; + opp_v1 = true; else goto out_node_put; } @@ -251,7 +251,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } - if (need_update) { + if (opp_v1) { struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) From 391d9aef8145204e0a5d67be3bd1fc45c5396dae Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:42 +0530 Subject: [PATCH 09/94] cpufreq: dt: OPP layers handles clock-latency for V1 bindings as well "clock-latency" is handled by OPP layer for all bindings and so there is no need to make special calls for V1 bindings. Use dev_pm_opp_get_max_clock_latency() for both the cases. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0047d20803db..4c9f8a828f6f 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -265,10 +265,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); - - of_property_read_u32(np, "clock-latency", &transition_latency); - } else { - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); } priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -279,6 +275,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); + transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); if (!transition_latency) transition_latency = CPUFREQ_ETERNAL; From 050794aaebbb9f2c2c50b340b6998273e7c64189 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:43 +0530 Subject: [PATCH 10/94] cpufreq: dt: Pass regulator name to the OPP core OPP core can handle the regulators by itself, and but it needs to know the name of the regulator to fetch. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4c9f8a828f6f..2af75f8088bb 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -34,6 +34,7 @@ struct private_data { struct regulator *cpu_reg; struct thermal_cooling_device *cdev; unsigned int voltage_tolerance; /* in percentage */ + const char *reg_name; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -119,6 +120,30 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return ret; } +/* + * An earlier version of opp-v1 bindings used to name the regulator + * "cpu0-supply", we still need to handle that for backwards compatibility. + */ +static const char *find_supply_name(struct device *dev, struct device_node *np) +{ + struct property *pp; + int cpu = dev->id; + + /* Try "cpu0" for older DTs */ + if (!cpu) { + pp = of_find_property(np, "cpu0-supply", NULL); + if (pp) + return "cpu0"; + } + + pp = of_find_property(np, "cpu-supply", NULL); + if (pp) + return "cpu"; + + dev_dbg(dev, "no regulator for cpu%d\n", cpu); + return NULL; +} + static int allocate_resources(int cpu, struct device **cdev, struct regulator **creg, struct clk **cclk) { @@ -200,6 +225,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; bool opp_v1 = false; + const char *name; int ret; ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); @@ -228,6 +254,20 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_node_put; } + /* + * OPP layer will be taking care of regulators now, but it needs to know + * the name of the regulator first. + */ + name = find_supply_name(cpu_dev, np); + if (name) { + ret = dev_pm_opp_set_regulator(cpu_dev, name); + if (ret) { + dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", + policy->cpu, ret); + goto out_node_put; + } + } + /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -273,6 +313,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } + priv->reg_name = name; of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); @@ -366,6 +407,8 @@ out_free_priv: kfree(priv); out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); + if (name) + dev_pm_opp_put_regulator(cpu_dev); out_node_put: of_node_put(np); out_put_reg_clk: @@ -383,6 +426,9 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->reg_name) + dev_pm_opp_put_regulator(priv->cpu_dev); + clk_put(policy->clk); if (!IS_ERR(priv->cpu_reg)) regulator_put(priv->cpu_reg); From 6def6ea75e6dea45f01a16ae3cfb5b5ce48dd5e9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:44 +0530 Subject: [PATCH 11/94] cpufreq: dt: Unsupported OPPs are already disabled The core already have a valid regulator set for the device opp and the unsupported OPPs are already disabled by the core. There is no need to repeat that in the user drivers, get rid of it. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 2af75f8088bb..c3fe89461ff4 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -349,8 +349,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) min_uV = opp_uV; if (opp_uV > max_uV) max_uV = opp_uV; - } else { - dev_pm_opp_disable(cpu_dev, opp_freq); } opp_freq++; From 755b888ff098c9f762717a9fbda7e05b16619069 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:45 +0530 Subject: [PATCH 12/94] cpufreq: dt: Reuse dev_pm_opp_get_max_transition_latency() OPP layer has all the information now to calculate transition latency (clock_latency + voltage_latency). Lets reuse the OPP layer helper dev_pm_opp_get_max_transition_latency() instead of open coding the same in cpufreq-dt driver. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 48 +++--------------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index c3fe89461ff4..6f80ce56b4ec 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -222,7 +222,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct regulator *cpu_reg; struct clk *cpu_clk; struct dev_pm_opp *suspend_opp; - unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; bool opp_v1 = false; const char *name; @@ -316,49 +315,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) priv->reg_name = name; of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); - if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; - - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq = 0; - - /* - * Disable any OPPs where the connected regulator isn't able to - * provide the specified voltage and record minimum and maximum - * voltage levels. - */ - while (1) { - struct dev_pm_opp *opp; - unsigned long opp_uV, tol_uV; - - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq); - if (IS_ERR(opp)) { - rcu_read_unlock(); - break; - } - opp_uV = dev_pm_opp_get_voltage(opp); - rcu_read_unlock(); - - tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, - opp_uV - tol_uV, - opp_uV + tol_uV)) { - if (opp_uV < min_uV) - min_uV = opp_uV; - if (opp_uV > max_uV) - max_uV = opp_uV; - } - - opp_freq++; - } - - ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); - if (ret > 0) - transition_latency += ret * 1000; - } - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); @@ -393,6 +349,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = transition_latency; of_node_put(np); From 78c3ba5df96c875b1668e1cd3ee0a69e62454f32 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:46 +0530 Subject: [PATCH 13/94] cpufreq: dt: Use dev_pm_opp_set_rate() to switch frequency OPP core supports frequency/voltage changes based on the target frequency now, use that instead of open coding the same in cpufreq-dt driver. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 73 +----------------------------------- 1 file changed, 2 insertions(+), 71 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 6f80ce56b4ec..150a172c7d0a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -45,79 +45,10 @@ static struct freq_attr *cpufreq_dt_attr[] = { static int set_target(struct cpufreq_policy *policy, unsigned int index) { - struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = policy->freq_table; - struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; - struct device *cpu_dev = priv->cpu_dev; - struct regulator *cpu_reg = priv->cpu_reg; - unsigned long volt = 0, tol = 0; - int volt_old = 0; - unsigned int old_freq, new_freq; - long freq_Hz, freq_exact; - int ret; - freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); - if (freq_Hz <= 0) - freq_Hz = freq_table[index].frequency * 1000; - - freq_exact = freq_Hz; - new_freq = freq_Hz / 1000; - old_freq = clk_get_rate(cpu_clk) / 1000; - - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq; - - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); - if (IS_ERR(opp)) { - rcu_read_unlock(); - dev_err(cpu_dev, "failed to find OPP for %ld\n", - freq_Hz); - return PTR_ERR(opp); - } - volt = dev_pm_opp_get_voltage(opp); - opp_freq = dev_pm_opp_get_freq(opp); - rcu_read_unlock(); - tol = volt * priv->voltage_tolerance / 100; - volt_old = regulator_get_voltage(cpu_reg); - dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", - opp_freq / 1000, volt); - } - - dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", - old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, - new_freq / 1000, volt ? volt / 1000 : -1); - - /* scaling up? scale voltage before frequency */ - if (!IS_ERR(cpu_reg) && new_freq > old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage up: %d\n", - ret); - return ret; - } - } - - ret = clk_set_rate(cpu_clk, freq_exact); - if (ret) { - dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - if (!IS_ERR(cpu_reg) && volt_old > 0) - regulator_set_voltage_tol(cpu_reg, volt_old, tol); - return ret; - } - - /* scaling down? scale voltage after frequency */ - if (!IS_ERR(cpu_reg) && new_freq < old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage down: %d\n", - ret); - clk_set_rate(cpu_clk, old_freq * 1000); - } - } - - return ret; + return dev_pm_opp_set_rate(priv->cpu_dev, + policy->freq_table[index].frequency * 1000); } /* From df2c8ec28e73d47392b8cb24828c15c54819da41 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:47 +0530 Subject: [PATCH 14/94] cpufreq: dt: No need to fetch voltage-tolerance Its already done by core and we don't need to get it anymore. And so, we don't need to get of node in cpufreq_init() anymore, move that to find_supply_name() instead. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 150a172c7d0a..bbafd7b63d1a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -33,7 +33,6 @@ struct private_data { struct device *cpu_dev; struct regulator *cpu_reg; struct thermal_cooling_device *cdev; - unsigned int voltage_tolerance; /* in percentage */ const char *reg_name; }; @@ -55,24 +54,38 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) * An earlier version of opp-v1 bindings used to name the regulator * "cpu0-supply", we still need to handle that for backwards compatibility. */ -static const char *find_supply_name(struct device *dev, struct device_node *np) +static const char *find_supply_name(struct device *dev) { + struct device_node *np; struct property *pp; int cpu = dev->id; + const char *name = NULL; + + np = of_node_get(dev->of_node); + + /* This must be valid for sure */ + if (WARN_ON(!np)) + return NULL; /* Try "cpu0" for older DTs */ if (!cpu) { pp = of_find_property(np, "cpu0-supply", NULL); - if (pp) - return "cpu0"; + if (pp) { + name = "cpu0"; + goto node_put; + } } pp = of_find_property(np, "cpu-supply", NULL); - if (pp) - return "cpu"; + if (pp) { + name = "cpu"; + goto node_put; + } dev_dbg(dev, "no regulator for cpu%d\n", cpu); - return NULL; +node_put: + of_node_put(np); + return name; } static int allocate_resources(int cpu, struct device **cdev, @@ -147,7 +160,6 @@ try_again: static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; - struct device_node *np; struct private_data *priv; struct device *cpu_dev; struct regulator *cpu_reg; @@ -164,13 +176,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) return ret; } - np = of_node_get(cpu_dev->of_node); - if (!np) { - dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); - ret = -ENOENT; - goto out_put_reg_clk; - } - /* Get OPP-sharing information from "operating-points-v2" bindings */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus); if (ret) { @@ -181,20 +186,20 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret == -ENOENT) opp_v1 = true; else - goto out_node_put; + goto out_put_reg_clk; } /* * OPP layer will be taking care of regulators now, but it needs to know * the name of the regulator first. */ - name = find_supply_name(cpu_dev, np); + name = find_supply_name(cpu_dev); if (name) { ret = dev_pm_opp_set_regulator(cpu_dev, name); if (ret) { dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); - goto out_node_put; + goto out_put_reg_clk; } } @@ -244,7 +249,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->reg_name = name; - of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { @@ -286,8 +290,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; - of_node_put(np); - return 0; out_free_cpufreq_table: @@ -298,8 +300,6 @@ out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) dev_pm_opp_put_regulator(cpu_dev); -out_node_put: - of_node_put(np); out_put_reg_clk: clk_put(cpu_clk); if (!IS_ERR(cpu_reg)) From dd02a3d920083b6cb0ee4f0eaf2c599b740bf5fe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:48 +0530 Subject: [PATCH 15/94] cpufreq: dt: No need to allocate resources anymore OPP layer manages it now and cpufreq-dt driver doesn't need it. But, we still need to check for availability of resources for deferred probing. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 116 ++++++++++++++--------------------- 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bbafd7b63d1a..f951f911786e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -31,7 +31,6 @@ struct private_data { struct device *cpu_dev; - struct regulator *cpu_reg; struct thermal_cooling_device *cdev; const char *reg_name; }; @@ -88,73 +87,59 @@ node_put: return name; } -static int allocate_resources(int cpu, struct device **cdev, - struct regulator **creg, struct clk **cclk) +static int resources_available(void) { struct device *cpu_dev; struct regulator *cpu_reg; struct clk *cpu_clk; int ret = 0; - char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; + const char *name; - cpu_dev = get_cpu_device(cpu); + cpu_dev = get_cpu_device(0); if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); + pr_err("failed to get cpu0 device\n"); return -ENODEV; } - /* Try "cpu0" for older DTs */ - if (!cpu) - reg = reg_cpu0; - else - reg = reg_cpu; + cpu_clk = clk_get(cpu_dev, NULL); + ret = PTR_ERR_OR_ZERO(cpu_clk); + if (ret) { + /* + * If cpu's clk node is present, but clock is not yet + * registered, we should try defering probe. + */ + if (ret == -EPROBE_DEFER) + dev_dbg(cpu_dev, "clock not ready, retry\n"); + else + dev_err(cpu_dev, "failed to get clock: %d\n", ret); -try_again: - cpu_reg = regulator_get_optional(cpu_dev, reg); + return ret; + } + + clk_put(cpu_clk); + + name = find_supply_name(cpu_dev); + /* Platform doesn't require regulator */ + if (!name) + return 0; + + cpu_reg = regulator_get_optional(cpu_dev, name); ret = PTR_ERR_OR_ZERO(cpu_reg); if (ret) { /* * If cpu's regulator supply node is present, but regulator is * not yet registered, we should try defering probe. */ - if (ret == -EPROBE_DEFER) { - dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", - cpu); - return ret; - } - - /* Try with "cpu-supply" */ - if (reg == reg_cpu0) { - reg = reg_cpu; - goto try_again; - } - - dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); - } - - cpu_clk = clk_get(cpu_dev, NULL); - ret = PTR_ERR_OR_ZERO(cpu_clk); - if (ret) { - /* put regulator */ - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); - - /* - * If cpu's clk node is present, but clock is not yet - * registered, we should try defering probe. - */ if (ret == -EPROBE_DEFER) - dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); + dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); else - dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, - ret); - } else { - *cdev = cpu_dev; - *creg = cpu_reg; - *cclk = cpu_clk; + dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); + + return ret; } - return ret; + regulator_put(cpu_reg); + return 0; } static int cpufreq_init(struct cpufreq_policy *policy) @@ -162,7 +147,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *freq_table; struct private_data *priv; struct device *cpu_dev; - struct regulator *cpu_reg; struct clk *cpu_clk; struct dev_pm_opp *suspend_opp; unsigned int transition_latency; @@ -170,9 +154,16 @@ static int cpufreq_init(struct cpufreq_policy *policy) const char *name; int ret; - ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); - if (ret) { - pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } + + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); return ret; } @@ -186,7 +177,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret == -ENOENT) opp_v1 = true; else - goto out_put_reg_clk; + goto out_put_clk; } /* @@ -199,7 +190,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) { dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); - goto out_put_reg_clk; + goto out_put_clk; } } @@ -257,9 +248,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->cpu_dev = cpu_dev; - priv->cpu_reg = cpu_reg; policy->driver_data = priv; - policy->clk = cpu_clk; rcu_read_lock(); @@ -300,10 +289,8 @@ out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) dev_pm_opp_put_regulator(cpu_dev); -out_put_reg_clk: +out_put_clk: clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); return ret; } @@ -319,8 +306,6 @@ static int cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_put_regulator(priv->cpu_dev); clk_put(policy->clk); - if (!IS_ERR(priv->cpu_reg)) - regulator_put(priv->cpu_reg); kfree(priv); return 0; @@ -373,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = { static int dt_cpufreq_probe(struct platform_device *pdev) { - struct device *cpu_dev; - struct regulator *cpu_reg; - struct clk *cpu_clk; int ret; /* @@ -385,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev) * * FIXME: Is checking this only for CPU0 sufficient ? */ - ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); + ret = resources_available(); if (ret) return ret; - clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); - dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) - dev_err(cpu_dev, "failed register driver: %d\n", ret); + dev_err(&pdev->dev, "failed register driver: %d\n", ret); return ret; } From 6541aef01a9b308f280d3f2d26a46858e6dbef6a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 12 Feb 2016 23:56:21 +0100 Subject: [PATCH 16/94] cpufreq: Drop unnecessary checks from show() and store() The show() and store() routines in the cpufreq core don't need to check if the struct freq_attr they want to use really provides the callbacks they need as expected (if that's not the case, it means a bug in the code anyway), so change them to avoid doing that. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 34b17447e0d1..78a262f3d9ab 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -818,12 +818,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ssize_t ret; down_read(&policy->rwsem); - - if (fattr->show) - ret = fattr->show(policy, buf); - else - ret = -EIO; - + ret = fattr->show(policy, buf); up_read(&policy->rwsem); return ret; @@ -838,18 +833,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, get_online_cpus(); - if (!cpu_online(policy->cpu)) - goto unlock; - - down_write(&policy->rwsem); - - if (fattr->store) + if (cpu_online(policy->cpu)) { + down_write(&policy->rwsem); ret = fattr->store(policy, buf, count); - else - ret = -EIO; + up_write(&policy->rwsem); + } - up_write(&policy->rwsem); -unlock: put_online_cpus(); return ret; From 5bc8ac0f68284e3c05e0465afb59c62c996d9d8a Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Thu, 18 Feb 2016 14:51:46 +0000 Subject: [PATCH 17/94] Documentation: cpufreq: intel_pstate: fix typo This just swaps a colon for a quote in the intel_pstate documentation. Signed-off-by: Felipe Franciosi Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index f7b12c071d53..e6bd1e6512a5 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -25,7 +25,7 @@ callback, so cpufreq core can't request a transition to a specific frequency. The driver provides minimum and maximum frequency limits and callbacks to set a policy. The policy in cpufreq sysfs is referred to as the "scaling governor". The cpufreq core can request the driver to operate in any of the two policies: -"performance: and "powersave". The driver decides which frequency to use based +"performance" and "powersave". The driver decides which frequency to use based on the above policy selection considering minimum and maximum frequency limits. The Intel P-State driver falls under the latter category, which implements the From 63af4055726a56e04caf354aac58478d2af07ce8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 20 Feb 2016 21:50:01 -0600 Subject: [PATCH 18/94] cpufreq: fix comment about return value of cpufreq_register_driver() The comment has been incorrect since commit 4dea5806d332 ("cpufreq: return EEXIST instead of EBUSY for second registering"). Signed-off-by: Eric Biggers Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 78a262f3d9ab..d84aff1593e4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2378,7 +2378,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); * submitted by the CPU Frequency driver. * * Registers a CPU Frequency driver to this core code. This code - * returns zero on success, -EBUSY when another driver got here first + * returns zero on success, -EEXIST when another driver got here first * (and isn't unregistered in the meantime). * */ From fd7dc7e6b6521453d1170dc1ed50320024b5ba9d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 21 Feb 2016 12:53:12 -0600 Subject: [PATCH 19/94] cpufreq: simplify for_each_suitable_policy() macro Signed-off-by: Eric Biggers Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 44 +++------------------------------------ 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d84aff1593e4..9be654375657 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy) return cpumask_empty(policy->cpus); } -static bool suitable_policy(struct cpufreq_policy *policy, bool active) -{ - return active == !policy_is_inactive(policy); -} - -/* Finds Next Acive/Inactive policy */ -static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy, - bool active) -{ - do { - /* No more policies in the list */ - if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) - return NULL; - - policy = list_next_entry(policy, policy_list); - } while (!suitable_policy(policy, active)); - - return policy; -} - -static struct cpufreq_policy *first_policy(bool active) -{ - struct cpufreq_policy *policy; - - /* No policies in the list */ - if (list_empty(&cpufreq_policy_list)) - return NULL; - - policy = list_first_entry(&cpufreq_policy_list, typeof(*policy), - policy_list); - - if (!suitable_policy(policy, active)) - policy = next_policy(policy, active); - - return policy; -} - /* Macros to iterate over CPU policies */ -#define for_each_suitable_policy(__policy, __active) \ - for (__policy = first_policy(__active); \ - __policy; \ - __policy = next_policy(__policy, __active)) +#define for_each_suitable_policy(__policy, __active) \ + list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ + if ((__active) == !policy_is_inactive(__policy)) #define for_each_active_policy(__policy) \ for_each_suitable_policy(__policy, true) From 41cfd64cf49fc84837341732a142f3d4cdc1e83a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 10:27:46 +0530 Subject: [PATCH 20/94] intel_pstate: Update frequencies of policy->cpus only from ->set_policy() The intel-pstate driver is using intel_pstate_hwp_set() from two separate paths, i.e. ->set_policy() callback and sysfs update path for the files present in /sys/devices/system/cpu/intel_pstate/ directory. While an update to the sysfs path applies to all the CPUs being managed by the driver (which essentially means all the online CPUs), the update via the ->set_policy() callback applies to a smaller group of CPUs managed by the policy for which ->set_policy() is called. And so, intel_pstate_hwp_set() should update frequencies of only the CPUs that are part of policy->cpus mask, while it is called from ->set_policy() callback. In order to do that, add a parameter (cpumask) to intel_pstate_hwp_set() and apply the frequency changes only to the concerned CPUs. For ->set_policy() path, we are only concerned about policy->cpus, and so policy->rwsem lock taken by the core prior to calling ->set_policy() is enough to take care of any races. The larger lock acquired by get_online_cpus() is required only for the updates to sysfs files. Add another routine, intel_pstate_hwp_set_online_cpus(), and call it from the sysfs update paths. This also fixes a lockdep reported recently, where policy->rwsem and get_online_cpus() could have been acquired in any order causing an ABBA deadlock. The sequence of events leading to that was: intel_pstate_init(...) ...cpufreq_online(...) down_write(&policy->rwsem); // Locks policy->rwsem ... cpufreq_init_policy(policy); ...intel_pstate_hwp_set(); get_online_cpus(); // Temporarily locks cpu_hotplug.lock ... up_write(&policy->rwsem); pm_suspend(...) ...disable_nonboot_cpus() _cpu_down() cpu_hotplug_begin(); // Locks cpu_hotplug.lock __cpu_notify(CPU_DOWN_PREPARE, ...); ...cpufreq_offline_prepare(); down_write(&policy->rwsem); // Locks policy->rwsem Reported-and-tested-by: Joonas Lahtinen Signed-off-by: Viresh Kumar Reviewed-by: Joonas Lahtinen Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cd83d477e32d..e85677653ef8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -286,7 +286,7 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } -static void intel_pstate_hwp_set(void) +static void intel_pstate_hwp_set(const struct cpumask *cpumask) { int min, hw_min, max, hw_max, cpu, range, adj_range; u64 value, cap; @@ -296,9 +296,7 @@ static void intel_pstate_hwp_set(void) hw_max = HWP_HIGHEST_PERF(cap); range = hw_max - hw_min; - get_online_cpus(); - - for_each_online_cpu(cpu) { + for_each_cpu(cpu, cpumask) { rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); adj_range = limits->min_perf_pct * range / 100; min = hw_min + adj_range; @@ -317,7 +315,12 @@ static void intel_pstate_hwp_set(void) value |= HWP_MAX_PERF(max); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } +} +static void intel_pstate_hwp_set_online_cpus(void) +{ + get_online_cpus(); + intel_pstate_hwp_set(cpu_online_mask); put_online_cpus(); } @@ -439,7 +442,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -465,7 +468,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -490,7 +493,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -1141,7 +1144,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) pr_debug("intel_pstate: set performance\n"); limits = &performance_limits; if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set(policy->cpus); return 0; } @@ -1173,7 +1176,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set(policy->cpus); return 0; } From 6019d23a73c79fe4b0f531b0e968b14e1d6f50f1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Feb 2016 00:03:01 +0100 Subject: [PATCH 21/94] cpufreq: Rearrange __cpufreq_driver_target() Drop a pointless label at a return statement from __cpufreq_driver_target() and rearrange that function to reduce the indentation level. No intentional functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 45 +++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9be654375657..bdf258ea0977 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1805,7 +1805,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - int retval = -EINVAL; + struct cpufreq_frequency_table *freq_table; + int index, retval; if (cpufreq_disabled()) return -ENODEV; @@ -1832,34 +1833,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, policy->restore_freq = policy->cur; if (cpufreq_driver->target) - retval = cpufreq_driver->target(policy, target_freq, relation); - else if (cpufreq_driver->target_index) { - struct cpufreq_frequency_table *freq_table; - int index; + return cpufreq_driver->target(policy, target_freq, relation); - freq_table = cpufreq_frequency_get_table(policy->cpu); - if (unlikely(!freq_table)) { - pr_err("%s: Unable to find freq_table\n", __func__); - goto out; - } + if (!cpufreq_driver->target_index) + return -EINVAL; - retval = cpufreq_frequency_table_target(policy, freq_table, - target_freq, relation, &index); - if (unlikely(retval)) { - pr_err("%s: Unable to find matching freq\n", __func__); - goto out; - } - - if (freq_table[index].frequency == policy->cur) { - retval = 0; - goto out; - } - - retval = __target_index(policy, freq_table, index); + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (unlikely(!freq_table)) { + pr_err("%s: Unable to find freq_table\n", __func__); + return -EINVAL; } -out: - return retval; + retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, + relation, &index); + if (unlikely(retval)) { + pr_err("%s: Unable to find matching freq\n", __func__); + return retval; + } + + if (freq_table[index].frequency == policy->cur) + return 0; + + return __target_index(policy, freq_table, index); } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); From 9a909a142f02bfa5fd3e203a564abc82fd0240c3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Feb 2016 00:03:58 +0100 Subject: [PATCH 22/94] cpufreq: acpi-cpufreq: Drop pointless label from acpi_cpufreq_target() The "out" label at the final return statement in acpi_cpufreq_target() is totally pointless, so drop them and modify the code to return the right values immediately instead of jumping to it. No functional changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Srinivas Pandruvada Acked-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 51eef87bbc37..17a8d0c58abb 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -434,7 +434,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } else { pr_debug("Already at target state (P%d)\n", next_perf_state); - goto out; + return 0; } } @@ -456,8 +456,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cmd.val = (u32) perf->states[next_perf_state].control; break; default: - result = -ENODEV; - goto out; + return -ENODEV; } /* cpufreq holds the hotplug lock, so we are safe from here on */ @@ -480,7 +479,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, if (!result) perf->state = next_perf_state; -out: return result; } From 34b0870515aaac6b7ea1ffdc370516b0a8024c82 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Feb 2016 00:22:57 +0100 Subject: [PATCH 23/94] cpufreq: Simplify the cpufreq_for_each_valid_entry() That macro uses an internal static inline function that is first totally unnecessary and second hard to read, so simplify it and get rid of that monster. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..4064cfcfbffd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -504,16 +504,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, } #endif -static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) -{ - while ((*pos)->frequency != CPUFREQ_TABLE_END) - if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID) - return true; - else - (*pos)++; - return false; -} - /* * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * @pos: the cpufreq_frequency_table * to use as a loop cursor. @@ -530,8 +520,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) * @table: the cpufreq_frequency_table * to iterate over. */ -#define cpufreq_for_each_valid_entry(pos, table) \ - for (pos = table; cpufreq_next_valid(&pos); pos++) +#define cpufreq_for_each_valid_entry(pos, table) \ + for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++) \ + if (pos->frequency == CPUFREQ_ENTRY_INVALID) \ + continue; \ + else int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); From 7791e4aa59ad724e0b4c8b4dea547a5735108972 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 25 Feb 2016 15:09:19 -0800 Subject: [PATCH 24/94] cpufreq: intel_pstate: Enable HWP by default If the processor supports HWP, enable it by default without checking for the cpu model. This will allow to enable HWP in all supported processors without driver change. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e85677653ef8..ebe8506ba285 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1400,6 +1400,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } static inline bool intel_pstate_has_acpi_ppc(void) { return false; } #endif /* CONFIG_ACPI */ +static const struct x86_cpu_id hwp_support_ids[] __initconst = { + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, + {} +}; + static int __init intel_pstate_init(void) { int cpu, rc = 0; @@ -1409,17 +1414,16 @@ static int __init intel_pstate_init(void) if (no_load) return -ENODEV; + if (x86_match_cpu(hwp_support_ids) && !no_hwp) { + copy_cpu_funcs(&core_params.funcs); + hwp_active++; + goto hwp_cpu_matched; + } + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; - /* - * The Intel pstate driver will be ignored if the platform - * firmware has its own power management modes. - */ - if (intel_pstate_platform_pwr_mgmt_exists()) - return -ENODEV; - cpu_def = (struct cpu_defaults *)id->driver_data; copy_pid_params(&cpu_def->pid_policy); @@ -1428,17 +1432,20 @@ static int __init intel_pstate_init(void) if (intel_pstate_msrs_not_valid()) return -ENODEV; +hwp_cpu_matched: + /* + * The Intel pstate driver will be ignored if the platform + * firmware has its own power management modes. + */ + if (intel_pstate_platform_pwr_mgmt_exists()) + return -ENODEV; + pr_info("Intel P-state driver initializing.\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { - pr_info("intel_pstate: HWP enabled\n"); - hwp_active++; - } - if (!hwp_active && hwp_only) goto out; @@ -1449,6 +1456,9 @@ static int __init intel_pstate_init(void) intel_pstate_debug_expose_params(); intel_pstate_sysfs_expose_params(); + if (hwp_active) + pr_info("intel_pstate: HWP enabled\n"); + return rc; out: get_online_cpus(); From f05c966585ec5295516f946505661ec2f9966e5a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 25 Feb 2016 15:09:31 -0800 Subject: [PATCH 25/94] cpufreq: intel_pstate: disable HWP notifications Disable HWP Interrupt notification before enabling HWP. Since we don't have HWP interrupt handling for possible performance interrupts, there is not much use of enabling HWP interrupts. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ebe8506ba285..937667065d31 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -534,6 +534,9 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { + /* First disable HWP notification interrupt as we don't process them */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } From c5e29ea7ac144113f60dc540d7bb00cea5056b10 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Fri, 26 Feb 2016 16:06:51 +0530 Subject: [PATCH 26/94] cpufreq: powernv: Fix bugs in powernv_cpufreq_{init/exit} Unregister the notifiers if cpufreq_driver_register() fails in powernv_cpufreq_init(). Re-arrange the unregistration and cleanup routines in powernv_cpufreq_exit() to free all the resources after the driver has unregistered. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 40 ++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 1bbc10a54c59..50bf12033bbc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -595,6 +595,19 @@ out: return ret; } +static inline void clean_chip_info(void) +{ + kfree(chips); + kfree(core_to_chip_map); +} + +static inline void unregister_all_notifiers(void) +{ + opal_message_notifier_unregister(OPAL_MSG_OCC, + &powernv_cpufreq_opal_nb); + unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); +} + static int __init powernv_cpufreq_init(void) { int rc = 0; @@ -605,30 +618,35 @@ static int __init powernv_cpufreq_init(void) /* Discover pstates from device tree and init */ rc = init_powernv_pstates(); - if (rc) { - pr_info("powernv-cpufreq disabled. System does not support PState control\n"); - return rc; - } + if (rc) + goto out; /* Populate chip info */ rc = init_chip_info(); if (rc) - return rc; + goto out; register_reboot_notifier(&powernv_cpufreq_reboot_nb); opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); - return cpufreq_register_driver(&powernv_cpufreq_driver); + + rc = cpufreq_register_driver(&powernv_cpufreq_driver); + if (!rc) + return 0; + + pr_info("Failed to register the cpufreq driver (%d)\n", rc); + unregister_all_notifiers(); + clean_chip_info(); +out: + pr_info("Platform driver disabled. System does not support PState control\n"); + return rc; } module_init(powernv_cpufreq_init); static void __exit powernv_cpufreq_exit(void) { - unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); - opal_message_notifier_unregister(OPAL_MSG_OCC, - &powernv_cpufreq_opal_nb); - kfree(chips); - kfree(core_to_chip_map); cpufreq_unregister_driver(&powernv_cpufreq_driver); + unregister_all_notifiers(); + clean_chip_info(); } module_exit(powernv_cpufreq_exit); From ed757a2c7bf7aa99d219b78349b4a0334851dc38 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 2 Mar 2016 03:05:22 +0100 Subject: [PATCH 27/94] cpufreq: acpi-cpufreq: Make read and write operations more efficient Setting a new CPU frequency and reading the current request value in the ACPI cpufreq driver involves each at least two switch instructions (there's more if the policy is shared). One of them is present in drv_read/write() that prepares a command structure and the other happens in subsequent do_drv_read/write() when that structure is interpreted. However, all of those switches may be avoided by using function pointers. To that end, add two function pointers to struct acpi_cpufreq_data to represent read and write operations on the frequency register and set them up during policy intitialization to point to the pair of routines suitable for the given processor (Intel/AMD MSR access or I/O port access). Then, use those pointers in do_drv_read/write() and modify drv_read/write() to prepare the command structure for them without any checks. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 210 +++++++++++++++------------------ 1 file changed, 96 insertions(+), 114 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 17a8d0c58abb..59a7b380fbe2 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -70,6 +70,8 @@ struct acpi_cpufreq_data { unsigned int cpu_feature; unsigned int acpi_perf_cpu; cpumask_var_t freqdomain_cpus; + void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); + u32 (*cpu_freq_read)(struct acpi_pct_register *reg); }; /* acpi_perf_data is a pointer to percpu data. */ @@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) } } -struct msr_addr { - u32 reg; -}; +u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) +{ + u32 val, dummy; -struct io_addr { - u16 port; - u8 bit_width; -}; + rdmsr(MSR_IA32_PERF_CTL, val, dummy); + return val; +} + +void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) +{ + u32 lo, hi; + + rdmsr(MSR_IA32_PERF_CTL, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE); + wrmsr(MSR_IA32_PERF_CTL, lo, hi); +} + +u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) +{ + u32 val, dummy; + + rdmsr(MSR_AMD_PERF_CTL, val, dummy); + return val; +} + +void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val) +{ + wrmsr(MSR_AMD_PERF_CTL, val, 0); +} + +u32 cpu_freq_read_io(struct acpi_pct_register *reg) +{ + u32 val; + + acpi_os_read_port(reg->address, &val, reg->bit_width); + return val; +} + +void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val) +{ + acpi_os_write_port(reg->address, val, reg->bit_width); +} struct drv_cmd { - unsigned int type; - const struct cpumask *mask; - union { - struct msr_addr msr; - struct io_addr io; - } addr; + struct acpi_pct_register *reg; u32 val; + union { + void (*write)(struct acpi_pct_register *reg, u32 val); + u32 (*read)(struct acpi_pct_register *reg); + } func; }; /* Called via smp_call_function_single(), on the target CPU */ static void do_drv_read(void *_cmd) { struct drv_cmd *cmd = _cmd; - u32 h; - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - case SYSTEM_AMD_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, - &cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } + cmd->val = cmd->func.read(cmd->reg); +} + +static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) +{ + struct acpi_processor_performance *perf = to_perf_data(data); + struct drv_cmd cmd = { + .reg = &perf->control_register, + .func.read = data->cpu_freq_read, + }; + int err; + + err = smp_call_function_any(mask, do_drv_read, &cmd, 1); + WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ + return cmd.val; } /* Called via smp_call_function_many(), on the target CPUs */ static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; - u32 lo, hi; - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, lo, hi); - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); - wrmsr(cmd->addr.msr.reg, lo, hi); - break; - case SYSTEM_AMD_MSR_CAPABLE: - wrmsr(cmd->addr.msr.reg, cmd->val, 0); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, - cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } + cmd->func.write(cmd->reg, cmd->val); } -static void drv_read(struct drv_cmd *cmd) -{ - int err; - cmd->val = 0; - - err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); - WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ -} - -static void drv_write(struct drv_cmd *cmd) +static void drv_write(struct acpi_cpufreq_data *data, + const struct cpumask *mask, u32 val) { + struct acpi_processor_performance *perf = to_perf_data(data); + struct drv_cmd cmd = { + .reg = &perf->control_register, + .val = val, + .func.write = data->cpu_freq_write, + }; int this_cpu; this_cpu = get_cpu(); - if (cpumask_test_cpu(this_cpu, cmd->mask)) - do_drv_write(cmd); - smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); + if (cpumask_test_cpu(this_cpu, mask)) + do_drv_write(&cmd); + + smp_call_function_many(mask, do_drv_write, &cmd, 1); put_cpu(); } -static u32 -get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) +static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { - struct acpi_processor_performance *perf; - struct drv_cmd cmd; + u32 val; if (unlikely(cpumask_empty(mask))) return 0; - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - break; - case SYSTEM_AMD_MSR_CAPABLE: - cmd.type = SYSTEM_AMD_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = to_perf_data(data); - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; - } + val = drv_read(data, mask); - cmd.mask = mask; - drv_read(&cmd); + pr_debug("get_cur_val = %u\n", val); - pr_debug("get_cur_val = %u\n", cmd.val); - - return cmd.val; + return val; } static unsigned int get_cur_freq_on_cpu(unsigned int cpu) @@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; - struct drv_cmd cmd; + const struct cpumask *mask; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; @@ -438,37 +434,17 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_AMD_MSR_CAPABLE: - cmd.type = SYSTEM_AMD_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - return -ENODEV; - } + /* + * The core won't allow CPUs to go away until the governor has been + * stopped, so we can rely on the stability of policy->cpus. + */ + mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ? + cpumask_of(policy->cpu) : policy->cpus; - /* cpufreq holds the hotplug lock, so we are safe from here on */ - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = policy->cpus; - else - cmd.mask = cpumask_of(policy->cpu); - - drv_write(&cmd); + drv_write(data, mask, perf->states[next_perf_state].control); if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, data->freq_table[index].frequency, + if (!check_freqs(mask, data->freq_table[index].frequency, data)) { pr_debug("acpi_cpufreq_target failed (%d)\n", policy->cpu); @@ -738,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; + data->cpu_freq_read = cpu_freq_read_io; + data->cpu_freq_write = cpu_freq_write_io; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); if (check_est_cpu(cpu)) { data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + data->cpu_freq_read = cpu_freq_read_intel; + data->cpu_freq_write = cpu_freq_write_intel; break; } if (check_amd_hwpstate_cpu(cpu)) { data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; + data->cpu_freq_read = cpu_freq_read_amd; + data->cpu_freq_write = cpu_freq_write_amd; break; } result = -ENODEV; From 34e2c555f3e13c90e9284e23d00f03be8a6e06c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 20:20:42 +0100 Subject: [PATCH 28/94] cpufreq: Add mechanism for registering utilization update callbacks Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- drivers/cpufreq/cpufreq.c | 45 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 34 +++++++++++++++++++++++++++++ kernel/sched/deadline.c | 4 ++++ kernel/sched/fair.c | 26 +++++++++++++++++++++- kernel/sched/rt.c | 4 ++++ kernel/sched/sched.h | 1 + 6 files changed, 113 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 34b17447e0d1..e172b2a02c1d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list); static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); + +static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * @cpu: The CPU to set the pointer for. + * @data: New pointer value. + * + * Set and publish the update_util_data pointer for the given CPU. That pointer + * points to a struct update_util_data object containing a callback function + * to call from cpufreq_update_util(). That function will be called from an RCU + * read-side critical section, so it must not sleep. + * + * Callers must use RCU callbacks to free any memory that might be accessed + * via the old update_util_data pointer or invoke synchronize_rcu() right after + * this function to avoid use-after-free. + */ +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +{ + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); +} +EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @time: Current time. + * @util: Current utilization. + * @max: Utilization ceiling. + * + * This function is called by the scheduler on every invocation of + * update_load_avg() on the CPU whose utilization is being updated. + */ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ + struct update_util_data *data; + + rcu_read_lock(); + + data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data && data->func) + data->func(data, time, util, max); + + rcu_read_unlock(); +} + DEFINE_MUTEX(cpufreq_governor_lock); /* Flag to suspend/resume CPUFreq governors */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..704d85bf7242 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} + +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); + unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else +static inline void cpufreq_update_util(u64 time, unsigned long util, + unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} + static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index cd64c979d0e1..21a0aa6f810d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq) if (!dl_task(curr) || !on_dl_rq(dl_se)) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + /* * Consumed budget is computed considering the time as * observed by schedulable tasks (excluding time spent diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56b7d4b83947..e2987a7e489d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 now = cfs_rq_clock_task(cfs_rq); - int cpu = cpu_of(rq_of(cfs_rq)); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); /* * Track task load average for carrying it to new CPU after migrated, and @@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { + unsigned long max = rq->cpu_capacity_orig; + + /* + * There are a few boundary cases this might miss but it should + * get called often enough that that should (hopefully) not be + * a real problem -- added to that it only calls on the local + * CPU, so if we enqueue remotely we'll miss an update, but + * the next tick/schedule should update. + * + * It will not get called when we go idle, because the idle + * thread is a different class (!fair), nor will the utilization + * number include things like RT tasks. + * + * As is, the util number is not freq-invariant (we'd have to + * implement arch_scale_freq_capacity() for that). + * + * See cpu_util(). + */ + cpufreq_update_util(rq_clock(rq), + min(cfs_rq->avg.util_avg, max), max); + } } static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..27f5b03cbdbe 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq) if (curr->sched_class != &rt_sched_class) return; + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_trigger_update(rq_clock(rq)); + delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec <= 0)) return; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10f16374df7f..f042190c8002 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "cpupri.h" #include "cpudeadline.h" From a4675fbc4a7abe072ac6ba38c252f22a91ebcd94 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 01:45:30 +0100 Subject: [PATCH 29/94] cpufreq: intel_pstate: Replace timers with utilization update callbacks Instead of using a per-CPU deferrable timer for utilization sampling and P-states adjustments, register a utilization update callback that will be invoked from the scheduler on utilization changes. The sampling rate is still the same as what was used for the deferrable timers, so the functional impact of this patch should not be significant. Based on an earlier patch from Srinivas Pandruvada. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 103 +++++++++++++-------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cd83d477e32d..f4d85c2ae7b1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -71,7 +71,7 @@ struct sample { u64 mperf; u64 tsc; int freq; - ktime_t time; + u64 time; }; struct pstate_data { @@ -103,13 +103,13 @@ struct _pid { struct cpudata { int cpu; - struct timer_list timer; + struct update_util_data update_util; struct pstate_data pstate; struct vid_data vid; struct _pid pid; - ktime_t last_sample_time; + u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; @@ -120,6 +120,7 @@ struct cpudata { static struct cpudata **all_cpu_data; struct pstate_adjust_policy { int sample_rate_ms; + s64 sample_rate_ns; int deadband; int setpoint; int p_gain_pct; @@ -712,7 +713,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); + wrmsrl(MSR_IA32_PERF_CTL, val); } static int knl_get_turbo_pstate(void) @@ -883,7 +884,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->core_pct_busy = (int32_t)core_pct; } -static inline void intel_pstate_sample(struct cpudata *cpu) +static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) { u64 aperf, mperf; unsigned long flags; @@ -900,7 +901,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; - cpu->sample.time = ktime_get(); + cpu->sample.time = time; cpu->sample.aperf = aperf; cpu->sample.mperf = mperf; cpu->sample.tsc = tsc; @@ -915,22 +916,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu) cpu->prev_tsc = tsc; } -static inline void intel_hwp_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(50); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - -static inline void intel_pstate_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(pid_params.sample_rate_ms); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -970,8 +955,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - s64 duration_us; - u32 sample_time; + u64 duration_ns; /* * core_busy is the ratio of actual performance to max @@ -990,18 +974,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* - * Since we have a deferred timer, it will not fire unless - * we are in C0. So, determine if the actual elapsed time - * is significantly greater (3x) than our sample interval. If it - * is, then we were idle for a long enough period of time - * to adjust our busyness. + * Since our utilization update callback will not run unless we are + * in C0, check if the actual elapsed time is significantly greater (3x) + * than our sample interval. If it is, then we were idle for a long + * enough period of time to adjust our busyness. */ - sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); - if (duration_us > sample_time * 3) { - sample_ratio = div_fp(int_tofp(sample_time), - int_tofp(duration_us)); + duration_ns = cpu->sample.time - cpu->last_sample_time; + if ((s64)duration_ns > pid_params.sample_rate_ns * 3 + && cpu->last_sample_time > 0) { + sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), + int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); } @@ -1031,23 +1013,17 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->freq); } -static void intel_hwp_timer_func(unsigned long __data) +static void intel_pstate_update_util(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) { - struct cpudata *cpu = (struct cpudata *) __data; + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; - intel_pstate_sample(cpu); - intel_hwp_set_sample_time(cpu); -} - -static void intel_pstate_timer_func(unsigned long __data) -{ - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); - - intel_pstate_adjust_busy_pstate(cpu); - - intel_pstate_set_sample_time(cpu); + if ((s64)delta_ns >= pid_params.sample_rate_ns) { + intel_pstate_sample(cpu, time); + if (!hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } } #define ICPU(model, policy) \ @@ -1095,24 +1071,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->cpu = cpunum; - if (hwp_active) + if (hwp_active) { intel_pstate_hwp_enable(cpu); + pid_params.sample_rate_ms = 50; + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } intel_pstate_get_cpu_pstates(cpu); - init_timer_deferrable(&cpu->timer); - cpu->timer.data = (unsigned long)cpu; - cpu->timer.expires = jiffies + HZ/100; - - if (!hwp_active) - cpu->timer.function = intel_pstate_timer_func; - else - cpu->timer.function = intel_hwp_timer_func; - intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu); + intel_pstate_sample(cpu, 0); - add_timer_on(&cpu->timer, cpunum); + cpu->update_util.func = intel_pstate_update_util; + cpufreq_set_update_util_data(cpunum, &cpu->update_util); pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); @@ -1196,7 +1167,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); - del_timer_sync(&all_cpu_data[cpu_num]->timer); + cpufreq_set_update_util_data(cpu_num, NULL); + synchronize_rcu(); + if (hwp_active) return; @@ -1260,6 +1233,7 @@ static int intel_pstate_msrs_not_valid(void) static void copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; pid_params.p_gain_pct = policy->p_gain_pct; pid_params.i_gain_pct = policy->i_gain_pct; pid_params.d_gain_pct = policy->d_gain_pct; @@ -1451,7 +1425,8 @@ out: get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); + cpufreq_set_update_util_data(cpu, NULL); + synchronize_rcu(); kfree(all_cpu_data[cpu]); } } From 9be4fd2c7723a3057b0b39676fe4c8d5fd7118a4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Feb 2016 16:53:50 +0100 Subject: [PATCH 30/94] cpufreq: governor: Replace timers with utilization update callbacks Instead of using a per-CPU deferrable timer for queuing up governor work items, register a utilization update callback that will be invoked from the scheduler on utilization changes. The sampling rate is still the same as what was used for the deferrable timers and the added irq_work overhead should be offset by the eliminated timers overhead, so in theory the functional impact of this patch should not be significant. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Gautham R. Shenoy --- drivers/cpufreq/Kconfig | 1 + drivers/cpufreq/cpufreq_conservative.c | 6 +- drivers/cpufreq/cpufreq_governor.c | 167 ++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 19 +-- drivers/cpufreq/cpufreq_ondemand.c | 43 ++++--- 5 files changed, 115 insertions(+), 121 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 659879a56dba..dcb972a38fbc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,6 +3,7 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" select SRCU + select IRQ_WORK help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 8504a70a4785..bc002c8cba90 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -112,14 +112,12 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - if (modify_all) - dbs_check_cpu(dbs_data, policy->cpu); - + dbs_check_cpu(dbs_data, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e0d111024d48..6bc2f50cc1d9 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -128,10 +128,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * dropped down. So we perform the copy only once, upon the * first wake-up from idle.) * - * Detecting this situation is easy: the governor's deferrable - * timer would not have fired during CPU-idle periods. Hence - * an unusually large 'wall_time' (as compared to the sampling - * rate) indicates this scenario. + * Detecting this situation is easy: the governor's utilization + * update handler would not have run during CPU-idle periods. + * Hence, an unusually large 'wall_time' (as compared to the + * sampling rate) indicates this scenario. * * prev_load can be zero in two cases and we must recalculate it * for both cases: @@ -161,72 +161,48 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) +void gov_set_update_util(struct cpu_common_dbs_info *shared, + unsigned int delay_us) { + struct cpufreq_policy *policy = shared->policy; struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; int cpu; + gov_update_sample_delay(shared, delay_us); + shared->last_sample_time = 0; + for_each_cpu(cpu, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - cdbs->timer.expires = jiffies + delay; - add_timer_on(&cdbs->timer, cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + + cpufreq_set_update_util_data(cpu, &cdbs->update_util); } } -EXPORT_SYMBOL_GPL(gov_add_timers); +EXPORT_SYMBOL_GPL(gov_set_update_util); -static inline void gov_cancel_timers(struct cpufreq_policy *policy) +static inline void gov_clear_update_util(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; int i; - for_each_cpu(i, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(i); - del_timer_sync(&cdbs->timer); - } + for_each_cpu(i, policy->cpus) + cpufreq_set_update_util_data(i, NULL); + + synchronize_rcu(); } -void gov_cancel_work(struct cpu_common_dbs_info *shared) +static void gov_cancel_work(struct cpu_common_dbs_info *shared) { - /* Tell dbs_timer_handler() to skip queuing up work items. */ + /* Tell dbs_update_util_handler() to skip queuing up work items. */ atomic_inc(&shared->skip_work); /* - * If dbs_timer_handler() is already running, it may not notice the - * incremented skip_work, so wait for it to complete to prevent its work - * item from being queued up after the cancel_work_sync() below. - */ - gov_cancel_timers(shared->policy); - /* - * In case dbs_timer_handler() managed to run and spawn a work item - * before the timers have been canceled, wait for that work item to - * complete and then cancel all of the timers set up by it. If - * dbs_timer_handler() runs again at that point, it will see the - * positive value of skip_work and won't spawn any more work items. + * If dbs_update_util_handler() is already running, it may not notice + * the incremented skip_work, so wait for it to complete to prevent its + * work item from being queued up after the cancel_work_sync() below. */ + gov_clear_update_util(shared->policy); + irq_work_sync(&shared->irq_work); cancel_work_sync(&shared->work); - gov_cancel_timers(shared->policy); atomic_set(&shared->skip_work, 0); } -EXPORT_SYMBOL_GPL(gov_cancel_work); - -/* Will return if we need to evaluate cpu load again or not */ -static bool need_load_eval(struct cpu_common_dbs_info *shared, - unsigned int sampling_rate) -{ - if (policy_is_shared(shared->policy)) { - ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); - - /* Do nothing if we recently have sampled */ - if (delta_us < (s64)(sampling_rate / 2)) - return false; - else - shared->time_stamp = time_now; - } - - return true; -} static void dbs_work_handler(struct work_struct *work) { @@ -234,56 +210,70 @@ static void dbs_work_handler(struct work_struct *work) cpu_common_dbs_info, work); struct cpufreq_policy *policy; struct dbs_data *dbs_data; - unsigned int sampling_rate, delay; - bool eval_load; + unsigned int delay; policy = shared->policy; dbs_data = policy->governor_data; - /* Kill all timers */ - gov_cancel_timers(policy); - - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - - sampling_rate = cs_tuners->sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - sampling_rate = od_tuners->sampling_rate; - } - - eval_load = need_load_eval(shared, sampling_rate); - /* - * Make sure cpufreq_governor_limits() isn't evaluating load in - * parallel. + * Make sure cpufreq_governor_limits() isn't evaluating load or the + * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); + delay = dbs_data->cdata->gov_dbs_timer(policy); + shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); + /* + * If the atomic operation below is reordered with respect to the + * sample delay modification, the utilization update handler may end + * up using a stale sample delay value. + */ + smp_mb__before_atomic(); atomic_dec(&shared->skip_work); - - gov_add_timers(policy, delay); } -static void dbs_timer_handler(unsigned long data) +static void dbs_irq_work(struct irq_work *irq_work) { - struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; + struct cpu_common_dbs_info *shared; + + shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work); + schedule_work(&shared->work); +} + +static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared) +{ +#ifdef CONFIG_SMP + irq_work_queue_on(&shared->irq_work, smp_processor_id()); +#else + irq_work_queue(&shared->irq_work); +#endif +} + +static void dbs_update_util_handler(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) +{ + struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct cpu_common_dbs_info *shared = cdbs->shared; /* - * Timer handler may not be allowed to queue the work at the moment, - * because: - * - Another timer handler has done that - * - We are stopping the governor - * - Or we are updating the sampling rate of the ondemand governor + * The work may not be allowed to be queued up right now. + * Possible reasons: + * - Work has already been queued up or is in progress. + * - The governor is being stopped. + * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&shared->skip_work) > 1) - atomic_dec(&shared->skip_work); - else - queue_work(system_wq, &shared->work); + if (atomic_inc_return(&shared->skip_work) == 1) { + u64 delta_ns; + + delta_ns = time - shared->last_sample_time; + if ((s64)delta_ns >= shared->sample_delay_ns) { + shared->last_sample_time = time; + gov_queue_irq_work(shared); + return; + } + } + atomic_dec(&shared->skip_work); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -315,6 +305,7 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, mutex_init(&shared->timer_mutex); atomic_set(&shared->skip_work, 0); + init_irq_work(&shared->irq_work, dbs_irq_work); INIT_WORK(&shared->work, dbs_work_handler); return 0; } @@ -467,9 +458,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } - shared->policy = policy; - shared->time_stamp = ktime_get(); - for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; @@ -485,10 +473,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - __setup_timer(&j_cdbs->timer, dbs_timer_handler, - (unsigned long)j_cdbs, - TIMER_DEFERRABLE | TIMER_IRQSAFE); + j_cdbs->update_util.func = dbs_update_util_handler; } + shared->policy = policy; if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -505,7 +492,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); + gov_set_update_util(shared, sampling_rate); return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 91e767a058a7..541777192dbc 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -18,6 +18,7 @@ #define _CPUFREQ_GOVERNOR_H #include +#include #include #include #include @@ -138,11 +139,19 @@ struct cpu_common_dbs_info { */ struct mutex timer_mutex; - ktime_t time_stamp; + u64 last_sample_time; + s64 sample_delay_ns; atomic_t skip_work; + struct irq_work irq_work; struct work_struct work; }; +static inline void gov_update_sample_delay(struct cpu_common_dbs_info *shared, + unsigned int delay_us) +{ + shared->sample_delay_ns = delay_us * NSEC_PER_USEC; +} + /* Per cpu structures */ struct cpu_dbs_info { u64 prev_cpu_idle; @@ -155,7 +164,7 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct timer_list timer; + struct update_util_data update_util; struct cpu_common_dbs_info *shared; }; @@ -212,8 +221,7 @@ struct common_dbs_data { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, - bool modify_all); + unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -270,9 +278,6 @@ static ssize_t show_sampling_rate_min_gov_pol \ } extern struct mutex cpufreq_governor_lock; - -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); -void gov_cancel_work(struct cpu_common_dbs_info *shared); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 929e193ac1c1..da7f3514d948 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -189,7 +189,7 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) +static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; unsigned int cpu = policy->cpu; @@ -198,9 +198,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; - if (!modify_all) - goto max_delay; - /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { @@ -216,7 +213,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) } } -max_delay: if (!delay) delay = delay_for_sampling_rate(od_tuners->sampling_rate * dbs_info->rate_mult); @@ -262,7 +258,6 @@ static void update_sampling_rate(struct dbs_data *dbs_data, struct od_cpu_dbs_info_s *dbs_info; struct cpu_dbs_info *cdbs; struct cpu_common_dbs_info *shared; - unsigned long next_sampling, appointed_at; dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cdbs = &dbs_info->cdbs; @@ -286,20 +281,28 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * policy will be governed by dbs_data, otherwise there can be * multiple policies that are governed by the same dbs_data. */ - if (dbs_data != policy->governor_data) - continue; - - /* - * Checking this for any CPU should be fine, timers for all of - * them are scheduled together. - */ - next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.timer.expires; - - if (time_before(next_sampling, appointed_at)) { - gov_cancel_work(shared); - gov_add_timers(policy, usecs_to_jiffies(new_rate)); - + if (dbs_data == policy->governor_data) { + mutex_lock(&shared->timer_mutex); + /* + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), + * but that really doesn't matter. If the read returns + * a value that's too big, the sample will be skipped, + * but the next invocation of dbs_update_util_handler() + * (when the update has been completed) will take a + * sample. If the returned value is too small, the + * sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect + * immediately anyway. + * + * If this runs in parallel with dbs_work_handler(), we + * may end up overwriting the sample_delay_ns value that + * it has just written, but the difference should not be + * too big and it will be corrected next time a sample + * is taken, so it shouldn't be significant. + */ + gov_update_sample_delay(shared, new_rate); + mutex_unlock(&shared->timer_mutex); } } From 2bb8d94fb03f808022c620f54b602a1e26d5cbac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:01:31 +0100 Subject: [PATCH 31/94] cpufreq: governor: Use common mutex for dbs_data protection Every governor relying on the common code in cpufreq_governor.c has to provide its own mutex in struct common_dbs_data. However, there actually is no need to have a separate mutex per governor for this purpose, they may be using the same global mutex just fine. Accordingly, introduce a single common mutex for that and drop the mutex field from struct common_dbs_data. That at least will ensure that the mutex is always present and initialized regardless of what the particular governors do. Another benefit is that the common code does not need a pointer to a governor-related structure to get to the mutex which sometimes helps. Finally, it makes the code generally easier to follow. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 1 - drivers/cpufreq/cpufreq_governor.c | 7 +++++-- drivers/cpufreq/cpufreq_governor.h | 6 +----- drivers/cpufreq/cpufreq_ondemand.c | 5 ++--- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index bc002c8cba90..8f0c3dbe2867 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -368,7 +368,6 @@ static struct common_dbs_data cs_dbs_cdata = { .gov_check_cpu = cs_check_cpu, .init = cs_init, .exit = cs_exit, - .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), }; static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6bc2f50cc1d9..f291fdd878ce 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,9 @@ #include "cpufreq_governor.h" +DEFINE_MUTEX(dbs_data_mutex); +EXPORT_SYMBOL_GPL(dbs_data_mutex); + static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) { if (have_governor_per_policy()) @@ -543,7 +546,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, int ret; /* Lock governor to block concurrent initialization of governor */ - mutex_lock(&cdata->mutex); + mutex_lock(&dbs_data_mutex); if (have_governor_per_policy()) dbs_data = policy->governor_data; @@ -576,7 +579,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, } unlock: - mutex_unlock(&cdata->mutex); + mutex_unlock(&dbs_data_mutex); return ret; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 541777192dbc..a9df62e87fcb 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -228,11 +228,6 @@ struct common_dbs_data { /* Governor specific ops, see below */ void *gov_ops; - - /* - * Protects governor's data (struct dbs_data and struct common_dbs_data) - */ - struct mutex mutex; }; /* Governor Per policy data */ @@ -277,6 +272,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } +extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index da7f3514d948..fac2f8f05bf8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -249,7 +249,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, /* * Lock governor so that governor start/stop can't execute in parallel. */ - mutex_lock(&od_dbs_cdata.mutex); + mutex_lock(&dbs_data_mutex); cpumask_copy(&cpumask, cpu_online_mask); @@ -306,7 +306,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, } } - mutex_unlock(&od_dbs_cdata.mutex); + mutex_unlock(&dbs_data_mutex); } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, @@ -552,7 +552,6 @@ static struct common_dbs_data od_dbs_cdata = { .gov_ops = &od_ops, .init = od_init, .exit = od_exit, - .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), }; static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, From 5da3dd1e00918a9ac4b83885453bfa9cad732b44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 03:15:24 +0100 Subject: [PATCH 32/94] cpufreq: governor: Avoid passing dbs_data pointers around unnecessarily Do not pass struct dbs_data pointers to the family of functions implementing governor operations in cpufreq_governor.c as they can take that pointer from policy->governor by themselves. The cpufreq_governor_init() case is slightly more complicated, since policy->governor may be NULL when it is invoked, but then it can reach the pointer in question via its cdata argument just fine. While at it, rework cpufreq_governor_dbs() to avoid a pointless policy_governor check in the CPUFREQ_GOV_POLICY_INIT case. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 68 ++++++++++++------------------ 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f291fdd878ce..a329e1bcb6bc 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -329,9 +329,9 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, } static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct dbs_data *dbs_data, struct common_dbs_data *cdata) { + struct dbs_data *dbs_data = cdata->gdbs_data; unsigned int latency; int ret; @@ -403,9 +403,9 @@ free_dbs_data: return ret; } -static int cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); @@ -432,9 +432,9 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_start(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_start(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); @@ -499,9 +499,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; @@ -515,9 +515,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, return 0; } -static int cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); @@ -542,45 +542,31 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy, int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event) { - struct dbs_data *dbs_data; - int ret; + int ret = -EINVAL; /* Lock governor to block concurrent initialization of governor */ mutex_lock(&dbs_data_mutex); - if (have_governor_per_policy()) - dbs_data = policy->governor_data; - else - dbs_data = cdata->gdbs_data; - - if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { - ret = -EINVAL; - goto unlock; + if (event == CPUFREQ_GOV_POLICY_INIT) { + ret = cpufreq_governor_init(policy, cdata); + } else if (policy->governor_data) { + switch (event) { + case CPUFREQ_GOV_POLICY_EXIT: + ret = cpufreq_governor_exit(policy); + break; + case CPUFREQ_GOV_START: + ret = cpufreq_governor_start(policy); + break; + case CPUFREQ_GOV_STOP: + ret = cpufreq_governor_stop(policy); + break; + case CPUFREQ_GOV_LIMITS: + ret = cpufreq_governor_limits(policy); + break; + } } - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - ret = cpufreq_governor_init(policy, dbs_data, cdata); - break; - case CPUFREQ_GOV_POLICY_EXIT: - ret = cpufreq_governor_exit(policy, dbs_data); - break; - case CPUFREQ_GOV_START: - ret = cpufreq_governor_start(policy, dbs_data); - break; - case CPUFREQ_GOV_STOP: - ret = cpufreq_governor_stop(policy, dbs_data); - break; - case CPUFREQ_GOV_LIMITS: - ret = cpufreq_governor_limits(policy, dbs_data); - break; - default: - ret = -EINVAL; - } - -unlock: mutex_unlock(&dbs_data_mutex); - return ret; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); From af926185231a6e30d11a6035410b61405e203c3b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Feb 2016 03:16:08 +0100 Subject: [PATCH 33/94] cpufreq: governor: Put governor structure into common_dbs_data For the ondemand and conservative governors (generally, governors that use the common code in cpufreq_governor.c), there are two static data structures representing the governor, the struct governor structure (the interface to the cpufreq core) and the struct common_dbs_data one (the interface to the cpufreq_governor.c code). There's no fundamental reason why those two structures have to be separate. Moreover, if the struct governor one is included into struct common_dbs_data, it will be possible to reach the latter from the policy via its policy->governor pointer, so it won't be necessary to pass a separate pointer to it around. For this reason, embed struct governor in struct common_dbs_data. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 78 ++++++++++++++------------ drivers/cpufreq/cpufreq_governor.h | 3 +- drivers/cpufreq/cpufreq_ondemand.c | 28 ++++----- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 8f0c3dbe2867..4597f7430c95 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,16 +23,6 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - -static struct cpufreq_governor cpufreq_gov_conservative = { - .name = "conservative", - .governor = cs_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -122,30 +112,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - struct cs_cpu_dbs_info_s *dbs_info = - &per_cpu(cs_cpu_dbs_info, freq->cpu); - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - - if (!policy) - return 0; - - /* policy isn't governed by conservative governor */ - if (policy->governor != &cpufreq_gov_conservative) - return 0; - - /* - * we only care if our internally tracked freq moves outside the 'valid' - * ranges of frequency available to us otherwise we do not change it - */ - if (dbs_info->requested_freq > policy->max - || dbs_info->requested_freq < policy->min) - dbs_info->requested_freq = freq->new; - - return 0; -} + void *data); static struct notifier_block cs_cpufreq_notifier_block = { .notifier_call = dbs_cpufreq_notifier, @@ -358,7 +325,16 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) define_get_cpu_dbs_routines(cs_cpu_dbs_info); +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + static struct common_dbs_data cs_dbs_cdata = { + .gov = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, .governor = GOV_CONSERVATIVE, .attr_group_gov_sys = &cs_attr_group_gov_sys, .attr_group_gov_pol = &cs_attr_group_gov_pol, @@ -370,20 +346,48 @@ static struct common_dbs_data cs_dbs_cdata = { .exit = cs_exit, }; +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_cdata.gov) + static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); } +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cs_cpu_dbs_info_s *dbs_info = + &per_cpu(cs_cpu_dbs_info, freq->cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + + if (!policy) + return 0; + + /* policy isn't governed by conservative governor */ + if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) + return 0; + + /* + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of frequency available to us otherwise we do not change it + */ + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; + + return 0; +} + static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_conservative); + return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_conservative); + cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); } MODULE_AUTHOR("Alexander Clouter "); @@ -395,7 +399,7 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return &cpufreq_gov_conservative; + return CPU_FREQ_GOV_CONSERVATIVE; } fs_initcall(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index a9df62e87fcb..2fa3cf104314 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -206,7 +206,8 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_data; struct common_dbs_data { - /* Common across governors */ + struct cpufreq_governor gov; + #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index fac2f8f05bf8..836116cd4bad 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -31,8 +31,6 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); static struct od_ops od_ops; -static struct cpufreq_governor cpufreq_gov_ondemand; - static unsigned int default_powersave_bias; static void ondemand_powersave_bias_init_cpu(int cpu) @@ -541,7 +539,16 @@ static struct od_ops od_ops = { .freq_increase = dbs_freq_increase, }; +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + static struct common_dbs_data od_dbs_cdata = { + .gov = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, .governor = GOV_ONDEMAND, .attr_group_gov_sys = &od_attr_group_gov_sys, .attr_group_gov_pol = &od_attr_group_gov_pol, @@ -554,19 +561,14 @@ static struct common_dbs_data od_dbs_cdata = { .exit = od_exit, }; +#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_cdata.gov) + static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); } -static struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = od_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; @@ -592,7 +594,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) policy = shared->policy; cpumask_or(&done, &done, policy->cpus); - if (policy->governor != &cpufreq_gov_ondemand) + if (policy->governor != CPU_FREQ_GOV_ONDEMAND) continue; dbs_data = policy->governor_data; @@ -620,12 +622,12 @@ EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_ondemand); + return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_ondemand); + cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); } MODULE_AUTHOR("Venkatesh Pallipadi "); @@ -637,7 +639,7 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return &cpufreq_gov_ondemand; + return CPU_FREQ_GOV_ONDEMAND; } fs_initcall(cpufreq_gov_dbs_init); From 7bdad34d0890b69c30e8c6a50c9c2311a839fd68 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:05:07 +0100 Subject: [PATCH 34/94] cpufreq: governor: Rename some data types and variables The ondemand and conservative governors are represented by struct common_dbs_data whose name doesn't reflect the purpose it is used for, so rename it to struct dbs_governor and rename variables of that type accordingly. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/cpufreq_conservative.c | 8 +-- drivers/cpufreq/cpufreq_governor.c | 88 +++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 12 ++-- drivers/cpufreq/cpufreq_ondemand.c | 8 +-- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f6b79ab0070b..a7d237b386d3 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,7 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct dbs_data *od_data = policy->governor_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = - od_data->cdata->get_cpu_dbs_info_s(policy->cpu); + od_data->gov->get_cpu_dbs_info_s(policy->cpu); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4597f7430c95..c65ac365a2dd 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -119,7 +119,7 @@ static struct notifier_block cs_cpufreq_notifier_block = { }; /************************** sysfs interface ************************/ -static struct common_dbs_data cs_dbs_cdata; +static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) @@ -328,7 +328,7 @@ define_get_cpu_dbs_routines(cs_cpu_dbs_info); static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -static struct common_dbs_data cs_dbs_cdata = { +static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", .governor = cs_cpufreq_governor_dbs, @@ -346,12 +346,12 @@ static struct common_dbs_data cs_dbs_cdata = { .exit = cs_exit, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_cdata.gov) +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); + return cpufreq_governor_dbs(policy, &cs_dbs_gov, event); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index a329e1bcb6bc..dc5bb298b449 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -28,14 +28,14 @@ EXPORT_SYMBOL_GPL(dbs_data_mutex); static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) { if (have_governor_per_policy()) - return dbs_data->cdata->attr_group_gov_pol; + return dbs_data->gov->attr_group_gov_pol; else - return dbs_data->cdata->attr_group_gov_sys; + return dbs_data->gov->attr_group_gov_sys; } void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy = cdbs->shared->policy; @@ -44,9 +44,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int ignore_nice; unsigned int j; - if (dbs_data->cdata->governor == GOV_ONDEMAND) { + if (dbs_data->gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - dbs_data->cdata->get_cpu_dbs_info_s(cpu); + dbs_data->gov->get_cpu_dbs_info_s(cpu); /* * Sometimes, the ondemand governor uses an additional @@ -71,7 +71,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int load; int io_busy = 0; - j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); + j_cdbs = dbs_data->gov->get_cpu_cdbs(j); /* * For the purpose of ondemand, waiting for disk IO is @@ -79,7 +79,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * not that the system is actually idle. So do not add * the iowait time to the cpu idle time. */ - if (dbs_data->cdata->governor == GOV_ONDEMAND) + if (dbs_data->gov->governor == GOV_ONDEMAND) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); @@ -160,7 +160,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->cdata->gov_check_cpu(cpu, max_load); + dbs_data->gov->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); @@ -175,7 +175,7 @@ void gov_set_update_util(struct cpu_common_dbs_info *shared, shared->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -223,7 +223,7 @@ static void dbs_work_handler(struct work_struct *work) * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->cdata->gov_dbs_timer(policy); + delay = dbs_data->gov->gov_dbs_timer(policy); shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); @@ -282,7 +282,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, static void set_sampling_rate(struct dbs_data *dbs_data, unsigned int sampling_rate) { - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + if (dbs_data->gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; cs_tuners->sampling_rate = sampling_rate; } else { @@ -292,7 +292,7 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } static int alloc_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { struct cpu_common_dbs_info *shared; int j; @@ -304,7 +304,7 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, /* Set shared for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) - cdata->get_cpu_cdbs(j)->shared = shared; + gov->get_cpu_cdbs(j)->shared = shared; mutex_init(&shared->timer_mutex); atomic_set(&shared->skip_work, 0); @@ -314,24 +314,24 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, } static void free_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; int j; mutex_destroy(&shared->timer_mutex); for_each_cpu(j, policy->cpus) - cdata->get_cpu_cdbs(j)->shared = NULL; + gov->get_cpu_cdbs(j)->shared = NULL; kfree(shared); } static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) + struct dbs_governor *gov) { - struct dbs_data *dbs_data = cdata->gdbs_data; + struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -343,7 +343,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (WARN_ON(have_governor_per_policy())) return -EINVAL; - ret = alloc_common_dbs_info(policy, cdata); + ret = alloc_common_dbs_info(policy, gov); if (ret) return ret; @@ -356,14 +356,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!dbs_data) return -ENOMEM; - ret = alloc_common_dbs_info(policy, cdata); + ret = alloc_common_dbs_info(policy, gov); if (ret) goto free_dbs_data; - dbs_data->cdata = cdata; + dbs_data->gov = gov; dbs_data->usage_count = 1; - ret = cdata->init(dbs_data, !policy->governor->initialized); + ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) goto free_common_dbs_info; @@ -379,7 +379,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, latency * LATENCY_MULTIPLIER)); if (!have_governor_per_policy()) - cdata->gdbs_data = dbs_data; + gov->gdbs_data = dbs_data; policy->governor_data = dbs_data; @@ -394,10 +394,10 @@ reset_gdbs_data: policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; - cdata->exit(dbs_data, !policy->governor->initialized); + gov->gdbs_data = NULL; + gov->exit(dbs_data, !policy->governor->initialized); free_common_dbs_info: - free_common_dbs_info(policy, cdata); + free_common_dbs_info(policy, gov); free_dbs_data: kfree(dbs_data); return ret; @@ -406,8 +406,8 @@ free_dbs_data: static int cpufreq_governor_exit(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct dbs_governor *gov = dbs_data->gov; + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ if (!cdbs->shared || cdbs->shared->policy) @@ -420,24 +420,24 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; + gov->gdbs_data = NULL; - cdata->exit(dbs_data, policy->governor->initialized == 1); + gov->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); } else { policy->governor_data = NULL; } - free_common_dbs_info(policy, cdata); + free_common_dbs_info(policy, gov); return 0; } static int cpufreq_governor_start(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; + struct dbs_governor *gov = dbs_data->gov; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct cpu_common_dbs_info *shared = cdbs->shared; int io_busy = 0; @@ -448,7 +448,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!shared || shared->policy) return -EBUSY; - if (cdata->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; sampling_rate = cs_tuners->sampling_rate; @@ -462,7 +462,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) } for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); unsigned int prev_load; j_cdbs->prev_cpu_idle = @@ -480,15 +480,15 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) } shared->policy = policy; - if (cdata->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = - cdata->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(cpu); cs_dbs_info->down_skip = 0; cs_dbs_info->requested_freq = policy->cur; } else { - struct od_ops *od_ops = cdata->gov_ops; - struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); + struct od_ops *od_ops = gov->gov_ops; + struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -502,7 +502,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); + struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -518,9 +518,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - struct common_dbs_data *cdata = dbs_data->cdata; + struct dbs_governor *gov = dbs_data->gov; unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); /* State should be equivalent to START */ if (!cdbs->shared || !cdbs->shared->policy) @@ -540,7 +540,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) } int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event) + struct dbs_governor *gov, unsigned int event) { int ret = -EINVAL; @@ -548,7 +548,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_data_mutex); if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy, cdata); + ret = cpufreq_governor_init(policy, gov); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 2fa3cf104314..ed87b8442985 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -78,7 +78,7 @@ __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) static ssize_t show_##file_name##_gov_sys \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ + struct _gov##_dbs_tuners *tuners = _gov##_dbs_gov.gdbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } \ \ @@ -94,7 +94,7 @@ static ssize_t show_##file_name##_gov_pol \ static ssize_t store_##file_name##_gov_sys \ (struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ { \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ return store_##file_name(dbs_data, buf, count); \ } \ \ @@ -205,7 +205,7 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_data; -struct common_dbs_data { +struct dbs_governor { struct cpufreq_governor gov; #define GOV_ONDEMAND 0 @@ -233,7 +233,7 @@ struct common_dbs_data { /* Governor Per policy data */ struct dbs_data { - struct common_dbs_data *cdata; + struct dbs_governor *gov; unsigned int min_sampling_rate; int usage_count; void *tuners; @@ -262,7 +262,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) static ssize_t show_sampling_rate_min_gov_sys \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } \ \ @@ -277,7 +277,7 @@ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event); + struct dbs_governor *gov, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 836116cd4bad..c38a4a1111d4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -219,7 +219,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) } /************************** sysfs interface ************************/ -static struct common_dbs_data od_dbs_cdata; +static struct dbs_governor od_dbs_gov; /** * update_sampling_rate - update sampling rate effective immediately if needed. @@ -542,7 +542,7 @@ static struct od_ops od_ops = { static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -static struct common_dbs_data od_dbs_cdata = { +static struct dbs_governor od_dbs_gov = { .gov = { .name = "ondemand", .governor = od_cpufreq_governor_dbs, @@ -561,12 +561,12 @@ static struct common_dbs_data od_dbs_cdata = { .exit = od_exit, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_cdata.gov) +#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); + return cpufreq_governor_dbs(policy, &od_dbs_gov, event); } static void od_set_powersave_bias(unsigned int powersave_bias) From 906a6e5aaef24d3c80bf6a06c794c7541aca64be Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:07:51 +0100 Subject: [PATCH 35/94] cpufreq: governor: Rework cpufreq_governor_dbs() Since it is possible to obtain a pointer to struct dbs_governor from a pointer to the struct governor embedded in it via container_of(), the second argument of cpufreq_governor_init() is not necessary. Accordingly, cpufreq_governor_dbs() doesn't need its second argument either and the ->governor callbacks for both the ondemand and conservative governors may be set to cpufreq_governor_dbs() directly. Make that happen. Signed-off-by: Rafael J. Wysocki Acked-by: Saravana Kannan Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 11 +---------- drivers/cpufreq/cpufreq_governor.c | 10 +++++----- drivers/cpufreq/cpufreq_governor.h | 3 +-- drivers/cpufreq/cpufreq_ondemand.c | 11 +---------- 4 files changed, 8 insertions(+), 27 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c65ac365a2dd..20c82913ef42 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -325,13 +325,10 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) define_get_cpu_dbs_routines(cs_cpu_dbs_info); -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", - .governor = cs_cpufreq_governor_dbs, + .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, @@ -348,12 +345,6 @@ static struct dbs_governor cs_dbs_gov = { #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &cs_dbs_gov, event); -} - static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index dc5bb298b449..7e579fc42d2a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -328,9 +328,10 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, kfree(shared); } -static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct dbs_governor *gov) +static int cpufreq_governor_init(struct cpufreq_policy *policy) { + struct dbs_governor *gov = container_of(policy->governor, + struct dbs_governor, gov); struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -539,8 +540,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) return 0; } -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct dbs_governor *gov, unsigned int event) +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { int ret = -EINVAL; @@ -548,7 +548,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_data_mutex); if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy, gov); + ret = cpufreq_governor_init(policy); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ed87b8442985..8e280b8c446a 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -276,8 +276,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct dbs_governor *gov, unsigned int event); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c38a4a1111d4..dcbcbf441ac1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -539,13 +539,10 @@ static struct od_ops od_ops = { .freq_increase = dbs_freq_increase, }; -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - static struct dbs_governor od_dbs_gov = { .gov = { .name = "ondemand", - .governor = od_cpufreq_governor_dbs, + .governor = cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, @@ -563,12 +560,6 @@ static struct dbs_governor od_dbs_gov = { #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &od_dbs_gov, event); -} - static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; From ea59ee0dc9796a4e879291cc2f4728d04c499313 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:09:51 +0100 Subject: [PATCH 36/94] cpufreq: governor: Drop the gov pointer from struct dbs_data Since it is possible to obtain a pointer to struct dbs_governor from a pointer to the struct governor embedded in it with the help of container_of(), the additional gov pointer in struct dbs_data isn't really necessary. Drop that pointer and make the code using it reach the dbs_governor object via policy->governor. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 63 ++++++++++++-------------- drivers/cpufreq/cpufreq_governor.h | 8 +++- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 5 files changed, 39 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index a7d237b386d3..6395a5f0ff25 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,7 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct dbs_data *od_data = policy->governor_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = - od_data->gov->get_cpu_dbs_info_s(policy->cpu); + dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 20c82913ef42..7d5f181e1679 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -107,7 +107,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - dbs_check_cpu(dbs_data, policy->cpu); + dbs_check_cpu(policy, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7e579fc42d2a..d3fa8b31015c 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,28 +25,27 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); -static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) +static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) { - if (have_governor_per_policy()) - return dbs_data->gov->attr_group_gov_pol; - else - return dbs_data->gov->attr_group_gov_sys; + return have_governor_per_policy() ? + gov->attr_group_gov_pol : gov->attr_group_gov_sys; } -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) +void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) { - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); + struct dbs_governor *gov = dbs_governor_of(policy); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; unsigned int j; - if (dbs_data->gov->governor == GOV_ONDEMAND) { + if (gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - dbs_data->gov->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(cpu); /* * Sometimes, the ondemand governor uses an additional @@ -71,7 +70,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int load; int io_busy = 0; - j_cdbs = dbs_data->gov->get_cpu_cdbs(j); + j_cdbs = gov->get_cpu_cdbs(j); /* * For the purpose of ondemand, waiting for disk IO is @@ -79,7 +78,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * not that the system is actually idle. So do not add * the iowait time to the cpu idle time. */ - if (dbs_data->gov->governor == GOV_ONDEMAND) + if (gov->governor == GOV_ONDEMAND) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); @@ -160,7 +159,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) max_load = load; } - dbs_data->gov->gov_check_cpu(cpu, max_load); + gov->gov_check_cpu(cpu, max_load); } EXPORT_SYMBOL_GPL(dbs_check_cpu); @@ -168,14 +167,14 @@ void gov_set_update_util(struct cpu_common_dbs_info *shared, unsigned int delay_us) { struct cpufreq_policy *policy = shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct dbs_governor *gov = dbs_governor_of(policy); int cpu; gov_update_sample_delay(shared, delay_us); shared->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -212,18 +211,18 @@ static void dbs_work_handler(struct work_struct *work) struct cpu_common_dbs_info *shared = container_of(work, struct cpu_common_dbs_info, work); struct cpufreq_policy *policy; - struct dbs_data *dbs_data; + struct dbs_governor *gov; unsigned int delay; policy = shared->policy; - dbs_data = policy->governor_data; + gov = dbs_governor_of(policy); /* * Make sure cpufreq_governor_limits() isn't evaluating load or the * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&shared->timer_mutex); - delay = dbs_data->gov->gov_dbs_timer(policy); + delay = gov->gov_dbs_timer(policy); shared->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&shared->timer_mutex); @@ -280,9 +279,10 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, } static void set_sampling_rate(struct dbs_data *dbs_data, - unsigned int sampling_rate) + struct dbs_governor *gov, + unsigned int sampling_rate) { - if (dbs_data->gov->governor == GOV_CONSERVATIVE) { + if (gov->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; cs_tuners->sampling_rate = sampling_rate; } else { @@ -330,8 +330,7 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, static int cpufreq_governor_init(struct cpufreq_policy *policy) { - struct dbs_governor *gov = container_of(policy->governor, - struct dbs_governor, gov); + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = gov->gdbs_data; unsigned int latency; int ret; @@ -361,7 +360,6 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (ret) goto free_dbs_data; - dbs_data->gov = gov; dbs_data->usage_count = 1; ret = gov->init(dbs_data, !policy->governor->initialized); @@ -376,7 +374,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, + set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER)); if (!have_governor_per_policy()) @@ -385,7 +383,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy->governor_data = dbs_data; ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); + get_sysfs_attr(gov)); if (ret) goto reset_gdbs_data; @@ -406,8 +404,8 @@ free_dbs_data: static int cpufreq_governor_exit(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ @@ -416,7 +414,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) if (!--dbs_data->usage_count) { sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); + get_sysfs_attr(gov)); policy->governor_data = NULL; @@ -435,8 +433,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) static int cpufreq_governor_start(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct cpu_common_dbs_info *shared = cdbs->shared; @@ -502,8 +500,8 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = dbs_data->gov->get_cpu_cdbs(policy->cpu); + struct dbs_governor *gov = dbs_governor_of(policy); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -518,8 +516,7 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct dbs_governor *gov = dbs_data->gov; + struct dbs_governor *gov = dbs_governor_of(policy); unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); @@ -534,7 +531,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) else if (policy->min > cdbs->shared->policy->cur) __cpufreq_driver_target(cdbs->shared->policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(dbs_data, cpu); + dbs_check_cpu(policy, cpu); mutex_unlock(&cdbs->shared->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8e280b8c446a..c8b7ec22871c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -231,9 +231,13 @@ struct dbs_governor { void *gov_ops; }; +static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) +{ + return container_of(policy->governor, struct dbs_governor, gov); +} + /* Governor Per policy data */ struct dbs_data { - struct dbs_governor *gov; unsigned int min_sampling_rate; int usage_count; void *tuners; @@ -275,7 +279,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +void dbs_check_cpu(struct cpufreq_policy *policy, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index dcbcbf441ac1..65ad39d95e39 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -203,7 +203,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(dbs_data, cpu); + dbs_check_cpu(policy, cpu); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; From e40e7b255e591d0448500c7910ec5693f58026bd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Feb 2016 17:07:44 +0100 Subject: [PATCH 37/94] cpufreq: governor: Rename cpu_common_dbs_info to policy_dbs_info The struct cpu_common_dbs_info structure represents the per-policy part of the governor data (for the ondemand and conservative governors), but its name doesn't reflect its purpose. Rename it to struct policy_dbs_info and rename variables related to it accordingly. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 134 ++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 8 +- drivers/cpufreq/cpufreq_ondemand.c | 28 +++--- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7d5f181e1679..b2df5de6cf92 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d3fa8b31015c..b425cd3da682 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -163,15 +163,15 @@ void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -void gov_set_update_util(struct cpu_common_dbs_info *shared, +void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { - struct cpufreq_policy *policy = shared->policy; + struct cpufreq_policy *policy = policy_dbs->policy; struct dbs_governor *gov = dbs_governor_of(policy); int cpu; - gov_update_sample_delay(shared, delay_us); - shared->last_sample_time = 0; + gov_update_sample_delay(policy_dbs, delay_us); + policy_dbs->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); @@ -191,40 +191,40 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_rcu(); } -static void gov_cancel_work(struct cpu_common_dbs_info *shared) +static void gov_cancel_work(struct policy_dbs_info *policy_dbs) { /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&shared->skip_work); + atomic_inc(&policy_dbs->skip_work); /* * If dbs_update_util_handler() is already running, it may not notice * the incremented skip_work, so wait for it to complete to prevent its * work item from being queued up after the cancel_work_sync() below. */ - gov_clear_update_util(shared->policy); - irq_work_sync(&shared->irq_work); - cancel_work_sync(&shared->work); - atomic_set(&shared->skip_work, 0); + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->skip_work, 0); } static void dbs_work_handler(struct work_struct *work) { - struct cpu_common_dbs_info *shared = container_of(work, struct - cpu_common_dbs_info, work); + struct policy_dbs_info *policy_dbs; struct cpufreq_policy *policy; struct dbs_governor *gov; unsigned int delay; - policy = shared->policy; + policy_dbs = container_of(work, struct policy_dbs_info, work); + policy = policy_dbs->policy; gov = dbs_governor_of(policy); /* * Make sure cpufreq_governor_limits() isn't evaluating load or the * ondemand governor isn't updating the sampling rate in parallel. */ - mutex_lock(&shared->timer_mutex); + mutex_lock(&policy_dbs->timer_mutex); delay = gov->gov_dbs_timer(policy); - shared->sample_delay_ns = jiffies_to_nsecs(delay); - mutex_unlock(&shared->timer_mutex); + policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); + mutex_unlock(&policy_dbs->timer_mutex); /* * If the atomic operation below is reordered with respect to the @@ -232,23 +232,23 @@ static void dbs_work_handler(struct work_struct *work) * up using a stale sample delay value. */ smp_mb__before_atomic(); - atomic_dec(&shared->skip_work); + atomic_dec(&policy_dbs->skip_work); } static void dbs_irq_work(struct irq_work *irq_work) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; - shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work); - schedule_work(&shared->work); + policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); + schedule_work(&policy_dbs->work); } -static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared) +static inline void gov_queue_irq_work(struct policy_dbs_info *policy_dbs) { #ifdef CONFIG_SMP - irq_work_queue_on(&shared->irq_work, smp_processor_id()); + irq_work_queue_on(&policy_dbs->irq_work, smp_processor_id()); #else - irq_work_queue(&shared->irq_work); + irq_work_queue(&policy_dbs->irq_work); #endif } @@ -256,7 +256,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, unsigned long util, unsigned long max) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* * The work may not be allowed to be queued up right now. @@ -265,17 +265,17 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&shared->skip_work) == 1) { + if (atomic_inc_return(&policy_dbs->skip_work) == 1) { u64 delta_ns; - delta_ns = time - shared->last_sample_time; - if ((s64)delta_ns >= shared->sample_delay_ns) { - shared->last_sample_time = time; - gov_queue_irq_work(shared); + delta_ns = time - policy_dbs->last_sample_time; + if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { + policy_dbs->last_sample_time = time; + gov_queue_irq_work(policy_dbs); return; } } - atomic_dec(&shared->skip_work); + atomic_dec(&policy_dbs->skip_work); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -291,41 +291,41 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } -static int alloc_common_dbs_info(struct cpufreq_policy *policy, +static int alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; int j; /* Allocate memory for the common information for policy->cpus */ - shared = kzalloc(sizeof(*shared), GFP_KERNEL); - if (!shared) + policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); + if (!policy_dbs) return -ENOMEM; - /* Set shared for all CPUs, online+offline */ + /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) - gov->get_cpu_cdbs(j)->shared = shared; + gov->get_cpu_cdbs(j)->policy_dbs = policy_dbs; - mutex_init(&shared->timer_mutex); - atomic_set(&shared->skip_work, 0); - init_irq_work(&shared->irq_work, dbs_irq_work); - INIT_WORK(&shared->work, dbs_work_handler); + mutex_init(&policy_dbs->timer_mutex); + atomic_set(&policy_dbs->skip_work, 0); + init_irq_work(&policy_dbs->irq_work, dbs_irq_work); + INIT_WORK(&policy_dbs->work, dbs_work_handler); return 0; } -static void free_common_dbs_info(struct cpufreq_policy *policy, +static void free_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int j; - mutex_destroy(&shared->timer_mutex); + mutex_destroy(&policy_dbs->timer_mutex); for_each_cpu(j, policy->cpus) - gov->get_cpu_cdbs(j)->shared = NULL; + gov->get_cpu_cdbs(j)->policy_dbs = NULL; - kfree(shared); + kfree(policy_dbs); } static int cpufreq_governor_init(struct cpufreq_policy *policy) @@ -343,7 +343,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (WARN_ON(have_governor_per_policy())) return -EINVAL; - ret = alloc_common_dbs_info(policy, gov); + ret = alloc_policy_dbs_info(policy, gov); if (ret) return ret; @@ -356,7 +356,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!dbs_data) return -ENOMEM; - ret = alloc_common_dbs_info(policy, gov); + ret = alloc_policy_dbs_info(policy, gov); if (ret) goto free_dbs_data; @@ -364,7 +364,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_common_dbs_info; + goto free_policy_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -395,8 +395,8 @@ reset_gdbs_data: if (!have_governor_per_policy()) gov->gdbs_data = NULL; gov->exit(dbs_data, !policy->governor->initialized); -free_common_dbs_info: - free_common_dbs_info(policy, gov); +free_policy_dbs_info: + free_policy_dbs_info(policy, gov); free_dbs_data: kfree(dbs_data); return ret; @@ -409,7 +409,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to INIT */ - if (!cdbs->shared || cdbs->shared->policy) + if (!cdbs->policy_dbs || cdbs->policy_dbs->policy) return -EBUSY; if (!--dbs_data->usage_count) { @@ -427,7 +427,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; } - free_common_dbs_info(policy, gov); + free_policy_dbs_info(policy, gov); return 0; } @@ -437,14 +437,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int io_busy = 0; if (!policy->cur) return -EINVAL; /* State should be equivalent to INIT */ - if (!shared || shared->policy) + if (!policy_dbs || policy_dbs->policy) return -EBUSY; if (gov->governor == GOV_CONSERVATIVE) { @@ -477,7 +477,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) j_cdbs->update_util.func = dbs_update_util_handler; } - shared->policy = policy; + policy_dbs->policy = policy; if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -494,7 +494,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) od_ops->powersave_bias_init_cpu(cpu); } - gov_set_update_util(shared, sampling_rate); + gov_set_update_util(policy_dbs, sampling_rate); return 0; } @@ -502,14 +502,14 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* State should be equivalent to START */ - if (!shared || !shared->policy) + if (!policy_dbs || !policy_dbs->policy) return -EBUSY; - gov_cancel_work(shared); - shared->policy = NULL; + gov_cancel_work(policy_dbs); + policy_dbs->policy = NULL; return 0; } @@ -521,18 +521,18 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); /* State should be equivalent to START */ - if (!cdbs->shared || !cdbs->shared->policy) + if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) return -EBUSY; - mutex_lock(&cdbs->shared->timer_mutex); - if (policy->max < cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->max, + mutex_lock(&cdbs->policy_dbs->timer_mutex); + if (policy->max < cdbs->policy_dbs->policy->cur) + __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->min, + else if (policy->min > cdbs->policy_dbs->policy->cur) + __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(policy, cpu); - mutex_unlock(&cdbs->shared->timer_mutex); + mutex_unlock(&cdbs->policy_dbs->timer_mutex); return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c8b7ec22871c..c90a2d3766fd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -131,7 +131,7 @@ static void *get_cpu_dbs_info_s(int cpu) \ */ /* Common to all CPUs of a policy */ -struct cpu_common_dbs_info { +struct policy_dbs_info { struct cpufreq_policy *policy; /* * Per policy mutex that serializes load evaluation from limit-change @@ -146,10 +146,10 @@ struct cpu_common_dbs_info { struct work_struct work; }; -static inline void gov_update_sample_delay(struct cpu_common_dbs_info *shared, +static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { - shared->sample_delay_ns = delay_us * NSEC_PER_USEC; + policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC; } /* Per cpu structures */ @@ -165,7 +165,7 @@ struct cpu_dbs_info { */ unsigned int prev_load; struct update_util_data update_util; - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; }; struct od_cpu_dbs_info_s { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 65ad39d95e39..4a2332733cca 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -151,7 +151,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -255,20 +255,20 @@ static void update_sampling_rate(struct dbs_data *dbs_data, struct cpufreq_policy *policy; struct od_cpu_dbs_info_s *dbs_info; struct cpu_dbs_info *cdbs; - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cdbs = &dbs_info->cdbs; - shared = cdbs->shared; + policy_dbs = cdbs->policy_dbs; /* - * A valid shared and shared->policy means governor hasn't - * stopped or exited yet. + * A valid policy_dbs and policy_dbs->policy means governor + * hasn't stopped or exited yet. */ - if (!shared || !shared->policy) + if (!policy_dbs || !policy_dbs->policy) continue; - policy = shared->policy; + policy = policy_dbs->policy; /* clear all CPUs of this policy */ cpumask_andnot(&cpumask, &cpumask, policy->cpus); @@ -280,7 +280,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * multiple policies that are governed by the same dbs_data. */ if (dbs_data == policy->governor_data) { - mutex_lock(&shared->timer_mutex); + mutex_lock(&policy_dbs->timer_mutex); /* * On 32-bit architectures this may race with the * sample_delay_ns read in dbs_update_util_handler(), @@ -299,8 +299,8 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * too big and it will be corrected next time a sample * is taken, so it shouldn't be significant. */ - gov_update_sample_delay(shared, new_rate); - mutex_unlock(&shared->timer_mutex); + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } } @@ -573,16 +573,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; if (cpumask_test_cpu(cpu, &done)) continue; - shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; - if (!shared) + policy_dbs = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy_dbs; + if (!policy_dbs) continue; - policy = shared->policy; + policy = policy_dbs->policy; cpumask_or(&done, &done, policy->cpus); if (policy->governor != CPU_FREQ_GOV_ONDEMAND) From d10b5eb5fce436ba22443ab83eeb36e195dbf772 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 6 Feb 2016 13:50:24 +0100 Subject: [PATCH 38/94] cpufreq: governor: Drop cpu argument from dbs_check_cpu() Since policy->cpu is always passed as the second argument to dbs_check_cpu(), it is not really necessary to pass it, because the function can obtain that value via its first argument just fine. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 8 ++++---- drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 6 ++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b2df5de6cf92..b8054e53a37e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -107,7 +107,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - dbs_check_cpu(policy, policy->cpu); + dbs_check_cpu(policy); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b425cd3da682..431d81f7963c 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -31,8 +31,9 @@ static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) gov->attr_group_gov_pol : gov->attr_group_gov_sys; } -void dbs_check_cpu(struct cpufreq_policy *policy, int cpu) +void dbs_check_cpu(struct cpufreq_policy *policy) { + int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); struct dbs_data *dbs_data = policy->governor_data; @@ -517,8 +518,7 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); /* State should be equivalent to START */ if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) @@ -531,7 +531,7 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) else if (policy->min > cdbs->policy_dbs->policy->cur) __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(policy, cpu); + dbs_check_cpu(policy); mutex_unlock(&cdbs->policy_dbs->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c90a2d3766fd..63868d7f14f5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -279,7 +279,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; -void dbs_check_cpu(struct cpufreq_policy *policy, int cpu); +void dbs_check_cpu(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4a2332733cca..9ef4402644c7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -190,9 +190,7 @@ static void od_check_cpu(int cpu, unsigned int load) static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct dbs_data *dbs_data = policy->governor_data; - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; @@ -203,7 +201,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(policy, cpu); + dbs_check_cpu(policy); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; From e9751894000af398d5895b3ee96052f57b80cc44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:23:49 +0100 Subject: [PATCH 39/94] cpufreq: governor: Simplify cpufreq_governor_limits() Use the observation that cpufreq_governor_limits() doesn't have to get to the policy object it wants to manipulate by walking the reference chain cdbs->policy_dbs->policy, as the final pointer is actually equal to its argument, and make it access the policy object directy via its argument. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 431d81f7963c..ff247a7ac774 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -519,20 +519,19 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; /* State should be equivalent to START */ - if (!cdbs->policy_dbs || !cdbs->policy_dbs->policy) + if (!policy_dbs || !policy_dbs->policy) return -EBUSY; - mutex_lock(&cdbs->policy_dbs->timer_mutex); - if (policy->max < cdbs->policy_dbs->policy->cur) - __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > cdbs->policy_dbs->policy->cur) - __cpufreq_driver_target(cdbs->policy_dbs->policy, policy->min, - CPUFREQ_RELATION_L); + mutex_lock(&policy_dbs->timer_mutex); + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(policy); - mutex_unlock(&cdbs->policy_dbs->timer_mutex); + mutex_unlock(&policy_dbs->timer_mutex); return 0; } From bc505475b85de9a9903e84ef0b369d4637354201 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:24:26 +0100 Subject: [PATCH 40/94] cpufreq: governor: Rearrange governor data structures The struct policy_dbs_info objects representing per-policy governor data are not accessible directly from the corresponding policy objects. To access them, one has to get a pointer to the struct cpu_dbs_info of policy->cpu and use the policy_dbs field of that which isn't really straightforward. To address that rearrange the governor data structures so the governor_data pointer in struct cpufreq_policy will point to struct policy_dbs_info (instead of struct dbs_data) and that will contain a pointer to struct dbs_data. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 3 +- drivers/cpufreq/cpufreq_conservative.c | 6 ++- drivers/cpufreq/cpufreq_governor.c | 74 +++++++++++++------------- drivers/cpufreq/cpufreq_governor.h | 27 +++++----- drivers/cpufreq/cpufreq_ondemand.c | 18 ++++--- 5 files changed, 68 insertions(+), 60 deletions(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 6395a5f0ff25..82ae1002def1 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -45,7 +45,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, long d_actual, d_reference; struct msr actual, reference; struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); - struct dbs_data *od_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_cpu_dbs_info_s *od_info = dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b8054e53a37e..1a899bb7d1a4 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -48,7 +48,8 @@ static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; /* @@ -104,7 +105,8 @@ static void cs_check_cpu(int cpu, unsigned int load) static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; dbs_check_cpu(policy); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index ff247a7ac774..82e50dcf9feb 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -35,8 +35,8 @@ void dbs_check_cpu(struct cpufreq_policy *policy) { int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int sampling_rate; @@ -95,6 +95,7 @@ void dbs_check_cpu(struct cpufreq_policy *policy) j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { + struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); u64 cur_nice; unsigned long cur_nice_jiffies; @@ -292,8 +293,8 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } -static int alloc_policy_dbs_info(struct cpufreq_policy *policy, - struct dbs_governor *gov) +static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, + struct dbs_governor *gov) { struct policy_dbs_info *policy_dbs; int j; @@ -301,7 +302,7 @@ static int alloc_policy_dbs_info(struct cpufreq_policy *policy, /* Allocate memory for the common information for policy->cpus */ policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); if (!policy_dbs) - return -ENOMEM; + return NULL; /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) @@ -311,7 +312,7 @@ static int alloc_policy_dbs_info(struct cpufreq_policy *policy, atomic_set(&policy_dbs->skip_work, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); - return 0; + return policy_dbs; } static void free_policy_dbs_info(struct cpufreq_policy *policy, @@ -333,6 +334,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data = gov->gdbs_data; + struct policy_dbs_info *policy_dbs; unsigned int latency; int ret; @@ -340,26 +342,26 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (policy->governor_data) return -EBUSY; + policy_dbs = alloc_policy_dbs_info(policy, gov); + if (!policy_dbs) + return -ENOMEM; + if (dbs_data) { - if (WARN_ON(have_governor_per_policy())) - return -EINVAL; - - ret = alloc_policy_dbs_info(policy, gov); - if (ret) - return ret; - + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_policy_dbs_info; + } dbs_data->usage_count++; - policy->governor_data = dbs_data; + policy_dbs->dbs_data = dbs_data; + policy->governor_data = policy_dbs; return 0; } dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); - if (!dbs_data) - return -ENOMEM; - - ret = alloc_policy_dbs_info(policy, gov); - if (ret) - goto free_dbs_data; + if (!dbs_data) { + ret = -ENOMEM; + goto free_policy_dbs_info; + } dbs_data->usage_count = 1; @@ -381,7 +383,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; - policy->governor_data = dbs_data; + policy_dbs->dbs_data = dbs_data; + policy->governor_data = policy_dbs; ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(gov)); @@ -396,21 +399,21 @@ reset_gdbs_data: if (!have_governor_per_policy()) gov->gdbs_data = NULL; gov->exit(dbs_data, !policy->governor->initialized); + kfree(dbs_data); + free_policy_dbs_info: free_policy_dbs_info(policy, gov); -free_dbs_data: - kfree(dbs_data); return ret; } static int cpufreq_governor_exit(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; /* State should be equivalent to INIT */ - if (!cdbs->policy_dbs || cdbs->policy_dbs->policy) + if (policy_dbs->policy) return -EBUSY; if (!--dbs_data->usage_count) { @@ -435,17 +438,16 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) static int cpufreq_governor_start(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int io_busy = 0; if (!policy->cur) return -EINVAL; /* State should be equivalent to INIT */ - if (!policy_dbs || policy_dbs->policy) + if (policy_dbs->policy) return -EBUSY; if (gov->governor == GOV_CONSERVATIVE) { @@ -501,12 +503,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; /* State should be equivalent to START */ - if (!policy_dbs || !policy_dbs->policy) + if (!policy_dbs->policy) return -EBUSY; gov_cancel_work(policy_dbs); @@ -517,12 +517,10 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy) static int cpufreq_governor_limits(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; /* State should be equivalent to START */ - if (!policy_dbs || !policy_dbs->policy) + if (!policy_dbs->policy) return -EBUSY; mutex_lock(&policy_dbs->timer_mutex); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 63868d7f14f5..95e6834d36a8 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -85,7 +85,8 @@ static ssize_t show_##file_name##_gov_sys \ static ssize_t show_##file_name##_gov_pol \ (struct cpufreq_policy *policy, char *buf) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } @@ -101,8 +102,8 @@ static ssize_t store_##file_name##_gov_sys \ static ssize_t store_##file_name##_gov_pol \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ - return store_##file_name(dbs_data, buf, count); \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + return store_##file_name(policy_dbs->dbs_data, buf, count); \ } #define show_store_one(_gov, file_name) \ @@ -130,6 +131,13 @@ static void *get_cpu_dbs_info_s(int cpu) \ * cs_*: Conservative governor */ +/* Governor demand based switching data (per-policy or global). */ +struct dbs_data { + unsigned int min_sampling_rate; + int usage_count; + void *tuners; +}; + /* Common to all CPUs of a policy */ struct policy_dbs_info { struct cpufreq_policy *policy; @@ -144,6 +152,8 @@ struct policy_dbs_info { atomic_t skip_work; struct irq_work irq_work; struct work_struct work; + /* dbs_data may be shared between multiple policy objects */ + struct dbs_data *dbs_data; }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, @@ -204,7 +214,6 @@ struct cs_dbs_tuners { }; /* Common Governor data across policies */ -struct dbs_data; struct dbs_governor { struct cpufreq_governor gov; @@ -236,13 +245,6 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } -/* Governor Per policy data */ -struct dbs_data { - unsigned int min_sampling_rate; - int usage_count; - void *tuners; -}; - /* Governor specific ops, will be passed to dbs_data->gov_ops */ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); @@ -273,7 +275,8 @@ static ssize_t show_sampling_rate_min_gov_sys \ static ssize_t show_sampling_rate_min_gov_pol \ (struct cpufreq_policy *policy, char *buf) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 9ef4402644c7..b7ef2e7f4d4a 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -78,7 +78,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int jiffies_total, jiffies_hi, jiffies_lo; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (!dbs_info->freq_table) { @@ -130,7 +131,8 @@ static void ondemand_powersave_bias_init(void) static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (od_tuners->powersave_bias) @@ -151,8 +153,9 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; + struct cpufreq_policy *policy = policy_dbs->policy; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; dbs_info->freq_lo = 0; @@ -189,7 +192,8 @@ static void od_check_cpu(int cpu, unsigned int load) static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; @@ -277,7 +281,7 @@ static void update_sampling_rate(struct dbs_data *dbs_data, * policy will be governed by dbs_data, otherwise there can be * multiple policies that are governed by the same dbs_data. */ - if (dbs_data == policy->governor_data) { + if (dbs_data == policy_dbs->dbs_data) { mutex_lock(&policy_dbs->timer_mutex); /* * On 32-bit architectures this may race with the @@ -586,7 +590,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) if (policy->governor != CPU_FREQ_GOV_ONDEMAND) continue; - dbs_data = policy->governor_data; + dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; } From cea6a9e77228c261191bc92df0d24bf5356b99ff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2016 16:25:02 +0100 Subject: [PATCH 41/94] cpufreq: governor: Symmetrize cpu_dbs_info initialization and cleanup Make the initialization of struct cpu_dbs_info objects in alloc_policy_dbs_info() and the code that cleans them up in free_policy_dbs_info() more symmetrical. In particular, set/clear the update_util.func field in those functions along with the policy_dbs field. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 82e50dcf9feb..7c08d8360f72 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -304,14 +304,18 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli if (!policy_dbs) return NULL; - /* Set policy_dbs for all CPUs, online+offline */ - for_each_cpu(j, policy->related_cpus) - gov->get_cpu_cdbs(j)->policy_dbs = policy_dbs; - mutex_init(&policy_dbs->timer_mutex); atomic_set(&policy_dbs->skip_work, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); + + /* Set policy_dbs for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + + j_cdbs->policy_dbs = policy_dbs; + j_cdbs->update_util.func = dbs_update_util_handler; + } return policy_dbs; } @@ -324,9 +328,12 @@ static void free_policy_dbs_info(struct cpufreq_policy *policy, mutex_destroy(&policy_dbs->timer_mutex); - for_each_cpu(j, policy->cpus) - gov->get_cpu_cdbs(j)->policy_dbs = NULL; + for_each_cpu(j, policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + j_cdbs->policy_dbs = NULL; + j_cdbs->update_util.func = NULL; + } kfree(policy_dbs); } @@ -477,8 +484,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - - j_cdbs->update_util.func = dbs_update_util_handler; } policy_dbs->policy = policy; From 686cc637c99324ad52a6f8e59181f6407405bfe2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Feb 2016 23:41:10 +0100 Subject: [PATCH 42/94] cpufreq: governor: Rename skip_work to work_count The skip_work field in struct policy_dbs_info technically is a counter, so give it a new name to reflect that. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 14 +++++++------- drivers/cpufreq/cpufreq_governor.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7c08d8360f72..298be52adea0 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -196,16 +196,16 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) static void gov_cancel_work(struct policy_dbs_info *policy_dbs) { /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&policy_dbs->skip_work); + atomic_inc(&policy_dbs->work_count); /* * If dbs_update_util_handler() is already running, it may not notice - * the incremented skip_work, so wait for it to complete to prevent its + * the incremented work_count, so wait for it to complete to prevent its * work item from being queued up after the cancel_work_sync() below. */ gov_clear_update_util(policy_dbs->policy); irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->skip_work, 0); + atomic_set(&policy_dbs->work_count, 0); } static void dbs_work_handler(struct work_struct *work) @@ -234,7 +234,7 @@ static void dbs_work_handler(struct work_struct *work) * up using a stale sample delay value. */ smp_mb__before_atomic(); - atomic_dec(&policy_dbs->skip_work); + atomic_dec(&policy_dbs->work_count); } static void dbs_irq_work(struct irq_work *irq_work) @@ -267,7 +267,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&policy_dbs->skip_work) == 1) { + if (atomic_inc_return(&policy_dbs->work_count) == 1) { u64 delta_ns; delta_ns = time - policy_dbs->last_sample_time; @@ -277,7 +277,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, return; } } - atomic_dec(&policy_dbs->skip_work); + atomic_dec(&policy_dbs->work_count); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -305,7 +305,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli return NULL; mutex_init(&policy_dbs->timer_mutex); - atomic_set(&policy_dbs->skip_work, 0); + atomic_set(&policy_dbs->work_count, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 95e6834d36a8..37537220e48c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -149,7 +149,7 @@ struct policy_dbs_info { u64 last_sample_time; s64 sample_delay_ns; - atomic_t skip_work; + atomic_t work_count; struct irq_work irq_work; struct work_struct work; /* dbs_data may be shared between multiple policy objects */ From fafd5e8ab29d965d6c7db326f2d4189dd9f3b002 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Feb 2016 23:57:22 +0100 Subject: [PATCH 43/94] cpufreq: governor: Drop pointless goto from cpufreq_governor_init() It is silly to jump around "return 0", so don't do that. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 298be52adea0..d6bd402a3237 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -395,12 +395,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(gov)); - if (ret) - goto reset_gdbs_data; + if (!ret) + return 0; - return 0; + /* Failure, so roll back. */ -reset_gdbs_data: policy->governor_data = NULL; if (!have_governor_per_policy()) From d0684d3b8934cfb8171755cdb1fc87f4c0335655 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:31 +0530 Subject: [PATCH 44/94] cpufreq: governor: Create generic macro for common tunables Some tunables are present in governor-specific structures, whereas one (min_sampling_rate) is located directly in struct dbs_data. There is a special macro for creating its sysfs attribute and the show/store callbacks, but since more tunables are going to be moved to struct dbs_data, a new generic macro for such cases will be useful, so add it and use it for min_sampling_rate. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 8 +++--- drivers/cpufreq/cpufreq_governor.h | 36 ++++++++++++++------------ drivers/cpufreq/cpufreq_ondemand.c | 8 +++--- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 1a899bb7d1a4..a69eb7eae7ec 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -245,7 +245,7 @@ show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); -declare_show_sampling_rate_min(cs); +show_one_common(cs, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(sampling_down_factor); @@ -253,10 +253,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(down_threshold); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate); static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &sampling_down_factor_gov_sys.attr, &up_threshold_gov_sys.attr, @@ -272,7 +272,7 @@ static struct attribute_group cs_attr_group_gov_sys = { }; static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &sampling_down_factor_gov_pol.attr, &up_threshold_gov_pol.attr, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 37537220e48c..cdf7536ac5fb 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -110,6 +110,26 @@ static ssize_t store_##file_name##_gov_pol \ show_one(_gov, file_name); \ store_one(_gov, file_name) +#define show_one_common(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct policy_dbs_info *policy_dbs = policy->governor_data; \ + struct dbs_data *dbs_data = policy_dbs->dbs_data; \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define show_store_one_common(_gov, file_name) \ +show_one_common(_gov, file_name); \ +store_one(_gov, file_name) + /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ @@ -264,22 +284,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) return delay; } -#define declare_show_sampling_rate_min(_gov) \ -static ssize_t show_sampling_rate_min_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} \ - \ -static ssize_t show_sampling_rate_min_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} - extern struct mutex dbs_data_mutex; extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index b7ef2e7f4d4a..8c44bc3fffc5 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -443,7 +443,7 @@ show_store_one(od, up_threshold); show_store_one(od, sampling_down_factor); show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); -declare_show_sampling_rate_min(od); +show_one_common(od, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(io_is_busy); @@ -451,10 +451,10 @@ gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(sampling_down_factor); gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(sampling_rate_min); +gov_sys_pol_attr_ro(min_sampling_rate); static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, + &min_sampling_rate_gov_sys.attr, &sampling_rate_gov_sys.attr, &up_threshold_gov_sys.attr, &sampling_down_factor_gov_sys.attr, @@ -470,7 +470,7 @@ static struct attribute_group od_attr_group_gov_sys = { }; static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, + &min_sampling_rate_gov_pol.attr, &sampling_rate_gov_pol.attr, &up_threshold_gov_pol.attr, &sampling_down_factor_gov_pol.attr, From ff4b17895e3166084c76ae703cb1c757bcc59799 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:32 +0530 Subject: [PATCH 45/94] cpufreq: governor: Move common tunables to 'struct dbs_data' There are a few common tunables shared between the ondemand and conservative governors. Move them to struct dbs_data to simplify code. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 38 +++++++++++------------- drivers/cpufreq/cpufreq_governor.c | 37 +++++------------------ drivers/cpufreq/cpufreq_governor.h | 14 ++++----- drivers/cpufreq/cpufreq_ondemand.c | 41 +++++++++++--------------- 4 files changed, 47 insertions(+), 83 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a69eb7eae7ec..4f640b028c94 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -60,7 +60,7 @@ static void cs_check_cpu(int cpu, unsigned int load) return; /* Check for frequency increase */ - if (load > cs_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ @@ -78,7 +78,7 @@ static void cs_check_cpu(int cpu, unsigned int load) } /* if sampling_down_factor is active break out early */ - if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) + if (++dbs_info->down_skip < dbs_data->sampling_down_factor) return; dbs_info->down_skip = 0; @@ -107,10 +107,9 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; dbs_check_cpu(policy); - return delay_for_sampling_rate(cs_tuners->sampling_rate); + return delay_for_sampling_rate(dbs_data->sampling_rate); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -126,7 +125,6 @@ static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -134,14 +132,13 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - cs_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input; return count; } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -149,7 +146,7 @@ static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, if (ret != 1) return -EINVAL; - cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + dbs_data->sampling_rate = max(input, dbs_data->min_sampling_rate); return count; } @@ -164,7 +161,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL; - cs_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } @@ -178,7 +175,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, /* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= cs_tuners->up_threshold) + input >= dbs_data->up_threshold) return -EINVAL; cs_tuners->down_threshold = input; @@ -188,7 +185,6 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input, j; int ret; @@ -199,10 +195,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + if (input == dbs_data->ignore_nice_load) /* nothing to do */ return count; - cs_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -210,7 +206,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, 0); - if (cs_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } @@ -239,12 +235,12 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(cs, sampling_rate); -show_store_one(cs, sampling_down_factor); -show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); -show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); +show_store_one_common(cs, sampling_rate); +show_store_one_common(cs, sampling_down_factor); +show_store_one_common(cs, up_threshold); +show_store_one_common(cs, ignore_nice_load); show_one_common(cs, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); @@ -299,11 +295,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) return -ENOMEM; } - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; tuners->freq_step = DEF_FREQUENCY_STEP; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; dbs_data->tuners = tuners; dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d6bd402a3237..3569782771ef 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -38,10 +38,9 @@ void dbs_check_cpu(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - unsigned int sampling_rate; + unsigned int sampling_rate = dbs_data->sampling_rate; + unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int ignore_nice; unsigned int j; if (gov->governor == GOV_ONDEMAND) { @@ -54,13 +53,8 @@ void dbs_check_cpu(struct cpufreq_policy *policy) * the 'sampling_rate', so as to keep the wake-up-from-idle * detection logic a bit conservative. */ - sampling_rate = od_tuners->sampling_rate; sampling_rate *= od_dbs_info->rate_mult; - ignore_nice = od_tuners->ignore_nice_load; - } else { - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; } /* Get Absolute Load */ @@ -280,19 +274,6 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, atomic_dec(&policy_dbs->work_count); } -static void set_sampling_rate(struct dbs_data *dbs_data, - struct dbs_governor *gov, - unsigned int sampling_rate) -{ - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - cs_tuners->sampling_rate = sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - od_tuners->sampling_rate = sampling_rate; - } -} - static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -384,8 +365,8 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate, - latency * LATENCY_MULTIPLIER)); + dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, + LATENCY_MULTIPLIER * latency); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; @@ -456,16 +437,12 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (policy_dbs->policy) return -EBUSY; - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + sampling_rate = dbs_data->sampling_rate; + ignore_nice = dbs_data->ignore_nice_load; - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; - } else { + if (gov->governor == GOV_ONDEMAND) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; - sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice_load; io_busy = od_tuners->io_is_busy; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index cdf7536ac5fb..e296362d21d2 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -153,9 +153,13 @@ static void *get_cpu_dbs_info_s(int cpu) \ /* Governor demand based switching data (per-policy or global). */ struct dbs_data { - unsigned int min_sampling_rate; int usage_count; void *tuners; + unsigned int min_sampling_rate; + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; }; /* Common to all CPUs of a policy */ @@ -216,19 +220,11 @@ struct cs_cpu_dbs_info_s { /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int powersave_bias; unsigned int io_is_busy; }; struct cs_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; unsigned int down_threshold; unsigned int freq_step; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 8c44bc3fffc5..13c64b662fa1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -110,7 +110,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, dbs_info->freq_lo_jiffies = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); + jiffies_total = usecs_to_jiffies(dbs_data->sampling_rate); jiffies_hi = (freq_avg - freq_lo) * jiffies_total; jiffies_hi += ((freq_hi - freq_lo) / 2); jiffies_hi /= (freq_hi - freq_lo); @@ -161,11 +161,10 @@ static void od_check_cpu(int cpu, unsigned int load) dbs_info->freq_lo = 0; /* Check for frequency increase */ - if (load > od_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = - od_tuners->sampling_down_factor; + dbs_info->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -195,7 +194,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ @@ -214,7 +212,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) } if (!delay) - delay = delay_for_sampling_rate(od_tuners->sampling_rate + delay = delay_for_sampling_rate(dbs_data->sampling_rate * dbs_info->rate_mult); return delay; @@ -239,11 +237,10 @@ static struct dbs_governor od_dbs_gov; static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cpumask cpumask; int cpu; - od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate); /* @@ -348,7 +345,6 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -358,21 +354,20 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return -EINVAL; } - od_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input, j; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - od_tuners->sampling_down_factor = input; + dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { @@ -399,10 +394,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + if (input == dbs_data->ignore_nice_load) { /* nothing to do */ return count; } - od_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -410,7 +405,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - if (od_tuners->ignore_nice_load) + if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -437,12 +432,12 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(od, sampling_rate); show_store_one(od, io_is_busy); -show_store_one(od, up_threshold); -show_store_one(od, sampling_down_factor); -show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); +show_store_one_common(od, sampling_rate); +show_store_one_common(od, up_threshold); +show_store_one_common(od, sampling_down_factor); +show_store_one_common(od, ignore_nice_load); show_one_common(od, min_sampling_rate); gov_sys_pol_attr_rw(sampling_rate); @@ -504,7 +499,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ - tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred @@ -512,15 +507,15 @@ static int od_init(struct dbs_data *dbs_data, bool notify) */ dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; /* For correct statistics, we need 10 ticks for each measure */ dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; tuners->io_is_busy = should_io_be_busy(); From c4435630361d9bebf7154a0c842dc1fb7ae39c99 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:33 +0530 Subject: [PATCH 46/94] cpufreq: governor: New sysfs show/store callbacks for governor tunables The ondemand and conservative governors use the global-attr or freq-attr structures to represent sysfs attributes corresponding to their tunables (which of them is actually used depends on whether or not different policy objects can use the same governor with different tunables at the same time and, consequently, on where those attributes are located in sysfs). Unfortunately, in the freq-attr case, the standard cpufreq show/store sysfs attribute callbacks are applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That may lead to an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. Therefore policy->rwsem cannot be dropped in cpufreq_set_policy() at any point, but the deadlock situation described above must be avoided too. To that end, use the observation that in principle governor tunables may be represented by the same data type regardless of whether the governor is system-wide or per-policy and introduce a new structure, struct governor_attr, for representing them and new corresponding macros for creating show/store sysfs callbacks for them. Also make their parent kobject use a new kobject type whose default show/store callbacks are not related to the standard core cpufreq ones in any way (and they don't acquire policy->rwsem in particular). Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog + rebase ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 68 +++++++++----------------- drivers/cpufreq/cpufreq_governor.c | 64 +++++++++++++++++++++--- drivers/cpufreq/cpufreq_governor.h | 39 ++++++++++++++- drivers/cpufreq/cpufreq_ondemand.c | 68 +++++++++----------------- 4 files changed, 140 insertions(+), 99 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4f640b028c94..ed081dbce00c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -235,54 +235,33 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(cs, down_threshold); -show_store_one(cs, freq_step); -show_store_one_common(cs, sampling_rate); -show_store_one_common(cs, sampling_down_factor); -show_store_one_common(cs, up_threshold); -show_store_one_common(cs, ignore_nice_load); -show_one_common(cs, min_sampling_rate); +gov_show_one_common(sampling_rate); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(up_threshold); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(cs, down_threshold); +gov_show_one(cs, freq_step); -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(down_threshold); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(min_sampling_rate); +gov_attr_rw(sampling_rate); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(up_threshold); +gov_attr_rw(ignore_nice_load); +gov_attr_ro(min_sampling_rate); +gov_attr_rw(down_threshold); +gov_attr_rw(freq_step); -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &up_threshold_gov_sys.attr, - &down_threshold_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &freq_step_gov_sys.attr, +static struct attribute *cs_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, NULL }; -static struct attribute_group cs_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "conservative", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &up_threshold_gov_pol.attr, - &down_threshold_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &freq_step_gov_pol.attr, - NULL -}; - -static struct attribute_group cs_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "conservative", -}; - /************************** sysfs end ************************/ static int cs_init(struct dbs_data *dbs_data, bool notify) @@ -331,8 +310,7 @@ static struct dbs_governor cs_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_CONSERVATIVE, - .attr_group_gov_sys = &cs_attr_group_gov_sys, - .attr_group_gov_pol = &cs_attr_group_gov_pol, + .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 3569782771ef..00cb468d3b6a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,12 +25,58 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); -static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov) +static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { - return have_governor_per_policy() ? - gov->attr_group_gov_pol : gov->attr_group_gov_sys; + return container_of(kobj, struct dbs_data, kobj); } +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + if (gattr->show) + ret = gattr->show(dbs_data, buf); + + return ret; +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EIO; + + mutex_lock(&dbs_data->mutex); + + if (gattr->store) + ret = gattr->store(dbs_data, buf, count); + + mutex_unlock(&dbs_data->mutex); + + return ret; +} + +/* + * Sysfs Ops for accessing governor attributes. + * + * All show/store invocations for governor specific sysfs attributes, will first + * call the below show/store callbacks and the attribute specific callback will + * be called from within it. + */ +static const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; + void dbs_check_cpu(struct cpufreq_policy *policy) { int cpu = policy->cpu; @@ -352,6 +398,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) } dbs_data->usage_count = 1; + mutex_init(&dbs_data->mutex); ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) @@ -374,12 +421,15 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; - ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, + get_governor_parent_kobj(policy), + "%s", gov->gov.name); if (!ret) return 0; /* Failure, so roll back. */ + pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); policy->governor_data = NULL; @@ -404,8 +454,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) return -EBUSY; if (!--dbs_data->usage_count) { - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(gov)); + kobject_put(&dbs_data->kobj); policy->governor_data = NULL; @@ -413,6 +462,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) gov->gdbs_data = NULL; gov->exit(dbs_data, policy->governor->initialized == 1); + mutex_destroy(&dbs_data->mutex); kfree(dbs_data); } else { policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index e296362d21d2..bdb6e4940b75 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -160,8 +160,44 @@ struct dbs_data { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; + + struct kobject kobj; + /* Protect concurrent updates to governor tunables from sysfs */ + struct mutex mutex; }; +/* Governor's specific attributes */ +struct dbs_data; +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct dbs_data *dbs_data, char *buf); + ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, + size_t count); +}; + +#define gov_show_one(_gov, file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define gov_show_one_common(file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ +} + +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + /* Common to all CPUs of a policy */ struct policy_dbs_info { struct cpufreq_policy *policy; @@ -236,8 +272,7 @@ struct dbs_governor { #define GOV_ONDEMAND 0 #define GOV_CONSERVATIVE 1 int governor; - struct attribute_group *attr_group_gov_sys; /* one governor - system */ - struct attribute_group *attr_group_gov_pol; /* one governor - policy */ + struct kobj_type kobj_type; /* * Common data for platforms that don't set diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 13c64b662fa1..e36792f60348 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -432,54 +432,33 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(od, io_is_busy); -show_store_one(od, powersave_bias); -show_store_one_common(od, sampling_rate); -show_store_one_common(od, up_threshold); -show_store_one_common(od, sampling_down_factor); -show_store_one_common(od, ignore_nice_load); -show_one_common(od, min_sampling_rate); +gov_show_one_common(sampling_rate); +gov_show_one_common(up_threshold); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(od, io_is_busy); +gov_show_one(od, powersave_bias); -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(io_is_busy); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(min_sampling_rate); +gov_attr_rw(sampling_rate); +gov_attr_rw(io_is_busy); +gov_attr_rw(up_threshold); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(ignore_nice_load); +gov_attr_rw(powersave_bias); +gov_attr_ro(min_sampling_rate); -static struct attribute *dbs_attributes_gov_sys[] = { - &min_sampling_rate_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &up_threshold_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &powersave_bias_gov_sys.attr, - &io_is_busy_gov_sys.attr, +static struct attribute *od_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, NULL }; -static struct attribute_group od_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "ondemand", -}; - -static struct attribute *dbs_attributes_gov_pol[] = { - &min_sampling_rate_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &up_threshold_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &powersave_bias_gov_pol.attr, - &io_is_busy_gov_pol.attr, - NULL -}; - -static struct attribute_group od_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "ondemand", -}; - /************************** sysfs end ************************/ static int od_init(struct dbs_data *dbs_data, bool notify) @@ -544,8 +523,7 @@ static struct dbs_governor od_dbs_gov = { .owner = THIS_MODULE, }, .governor = GOV_ONDEMAND, - .attr_group_gov_sys = &od_attr_group_gov_sys, - .attr_group_gov_pol = &od_attr_group_gov_pol, + .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, From fd8ddc482a7a5e015c0613c4d96543d5efad047c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:34 +0530 Subject: [PATCH 47/94] cpufreq: governor: Drop unused macros for creating governor tunable attributes The previous commit introduced a new set of macros for creating sysfs attributes that represent governor tunables and the old macros used for this purpose are not needed any more, so drop them. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.h | 89 ------------------------------ 1 file changed, 89 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bdb6e4940b75..0eb66a6c9503 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,95 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -/* - * Macro for creating governors sysfs routines - * - * - gov_sys: One governor instance per whole system - * - gov_pol: One governor instance per policy - */ - -/* Create attributes */ -#define gov_sys_attr_ro(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) - -#define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) - -#define gov_pol_attr_ro(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) - -#define gov_pol_attr_rw(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) - -#define gov_sys_pol_attr_rw(_name) \ - gov_sys_attr_rw(_name); \ - gov_pol_attr_rw(_name) - -#define gov_sys_pol_attr_ro(_name) \ - gov_sys_attr_ro(_name); \ - gov_pol_attr_ro(_name) - -/* Create show/store routines */ -#define show_one(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_gov.gdbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} - -#define store_one(_gov, file_name) \ -static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return store_##file_name(dbs_data, buf, count); \ -} \ - \ -static ssize_t store_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, const char *buf, size_t count) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - return store_##file_name(policy_dbs->dbs_data, buf, count); \ -} - -#define show_store_one(_gov, file_name) \ -show_one(_gov, file_name); \ -store_one(_gov, file_name) - -#define show_one_common(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_gov.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct policy_dbs_info *policy_dbs = policy->governor_data; \ - struct dbs_data *dbs_data = policy_dbs->dbs_data; \ - return sprintf(buf, "%u\n", dbs_data->file_name); \ -} - -#define show_store_one_common(_gov, file_name) \ -show_one_common(_gov, file_name); \ -store_one(_gov, file_name) - /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ From 68e80dae09033d778b98dc88e5bfe8fdade188e5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:35 +0530 Subject: [PATCH 48/94] Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT" Earlier, when the struct freq-attr was used to represent governor attributes, the standard cpufreq show/store sysfs attribute callbacks were applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That could have resulted in an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. The previous commit, "cpufreq: governor: New sysfs show/store callbacks for governor tunables", fixed the original ABBA deadlock by adding new governor specific show/store callbacks. We don't have to drop rwsem around invocations of governor event CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now. Fixes: 955ef4833574 (cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT) Signed-off-by: Viresh Kumar Reported-by: Juri Lelli Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 ----- include/linux/cpufreq.h | 4 ---- 2 files changed, 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e172b2a02c1d..e92e9eab7c6c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2205,10 +2205,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - up_write(&policy->rwsem); ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); - if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2224,9 +2221,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (!ret) goto out; - up_write(&policy->rwsem); __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); } /* new governor failed, so re-start old one */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 704d85bf7242..cac3d1ba8200 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem; From c54df0718423ea2941151d8516eb76ca6a32a4b4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Feb 2016 11:00:25 +0530 Subject: [PATCH 49/94] cpufreq: governor: Create and traverse list of policy_dbs to avoid deadlock The dbs_data_mutex lock is currently used in two places. First, cpufreq_governor_dbs() uses it to guarantee mutual exclusion between invocations of governor operations from the core. Second, it is used by ondemand governor's update_sampling_rate() to ensure the stability of data structures walked by it. The second usage is quite problematic, because update_sampling_rate() is called from a governor sysfs attribute's ->store callback and that leads to a deadlock scenario involving cpufreq_governor_exit() which runs under dbs_data_mutex. Thus it is better to rework the code so update_sampling_rate() doesn't need to acquire dbs_data_mutex. To that end, rework update_sampling_rate() to walk a list of policy_dbs objects supported by the dbs_data one it has been called for (instead of walking cpu_dbs_info object for all CPUs). The list manipulation is protected with dbs_data->mutex which also is held around the execution of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in that function any more. Reported-by: Juri Lelli Reported-by: Shilpasri G Bhat Signed-off-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 22 ++++++-- drivers/cpufreq/cpufreq_governor.h | 7 ++- drivers/cpufreq/cpufreq_ondemand.c | 89 ++++++++++-------------------- 3 files changed, 54 insertions(+), 64 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 00cb468d3b6a..2f35270fbd43 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -385,9 +385,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) ret = -EINVAL; goto free_policy_dbs_info; } - dbs_data->usage_count++; policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + + mutex_lock(&dbs_data->mutex); + dbs_data->usage_count++; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + mutex_unlock(&dbs_data->mutex); + return 0; } @@ -397,7 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } - dbs_data->usage_count = 1; + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); mutex_init(&dbs_data->mutex); ret = gov->init(dbs_data, !policy->governor->initialized); @@ -418,9 +423,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; - policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; + policy_dbs->dbs_data = dbs_data; + dbs_data->usage_count = 1; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, get_governor_parent_kobj(policy), @@ -448,12 +456,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; + int count; /* State should be equivalent to INIT */ if (policy_dbs->policy) return -EBUSY; - if (!--dbs_data->usage_count) { + mutex_lock(&dbs_data->mutex); + list_del(&policy_dbs->list); + count = --dbs_data->usage_count; + mutex_unlock(&dbs_data->mutex); + + if (!count) { kobject_put(&dbs_data->kobj); policy->governor_data = NULL; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 0eb66a6c9503..8bf4775ce03c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -73,7 +73,11 @@ struct dbs_data { unsigned int up_threshold; struct kobject kobj; - /* Protect concurrent updates to governor tunables from sysfs */ + struct list_head policy_dbs_list; + /* + * Protect concurrent updates to governor tunables from sysfs, + * policy_dbs_list and usage_count. + */ struct mutex mutex; }; @@ -125,6 +129,7 @@ struct policy_dbs_info { struct work_struct work; /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; + struct list_head list; }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e36792f60348..38301c6b31c7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov; * @new_rate: new sampling rate * * If new rate is smaller than the old, simply updating - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * dbs.sampling_rate might not be appropriate. For example, if the * original sampling_rate was 1 second and the requested new sampling rate is 10 * ms because the user needs immediate reaction from ondemand governor, but not * sure if higher frequency will be required or not, then, the governor may * change the sampling rate too late; up to 1 second later. Thus, if we are * reducing the sampling rate, we need to make the new value effective * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. */ static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { - struct cpumask cpumask; - int cpu; + struct policy_dbs_info *policy_dbs; dbs_data->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate); /* - * Lock governor so that governor start/stop can't execute in parallel. + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. */ - mutex_lock(&dbs_data_mutex); - - cpumask_copy(&cpumask, cpu_online_mask); - - for_each_cpu(cpu, &cpumask) { - struct cpufreq_policy *policy; - struct od_cpu_dbs_info_s *dbs_info; - struct cpu_dbs_info *cdbs; - struct policy_dbs_info *policy_dbs; - - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cdbs = &dbs_info->cdbs; - policy_dbs = cdbs->policy_dbs; - + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); /* - * A valid policy_dbs and policy_dbs->policy means governor - * hasn't stopped or exited yet. + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. */ - if (!policy_dbs || !policy_dbs->policy) - continue; - - policy = policy_dbs->policy; - - /* clear all CPUs of this policy */ - cpumask_andnot(&cpumask, &cpumask, policy->cpus); - - /* - * Update sampling rate for CPUs whose policy is governed by - * dbs_data. In case of governor_per_policy, only a single - * policy will be governed by dbs_data, otherwise there can be - * multiple policies that are governed by the same dbs_data. - */ - if (dbs_data == policy_dbs->dbs_data) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), - * but that really doesn't matter. If the read returns - * a value that's too big, the sample will be skipped, - * but the next invocation of dbs_update_util_handler() - * (when the update has been completed) will take a - * sample. If the returned value is too small, the - * sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect - * immediately anyway. - * - * If this runs in parallel with dbs_work_handler(), we - * may end up overwriting the sample_delay_ns value that - * it has just written, but the difference should not be - * too big and it will be corrected next time a sample - * is taken, so it shouldn't be significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } + gov_update_sample_delay(policy_dbs, new_rate); + mutex_unlock(&policy_dbs->timer_mutex); } - - mutex_unlock(&dbs_data_mutex); } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, From 69cee7147b4a4ea02085d571cd2d9974d4a4d8d5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:11 +0530 Subject: [PATCH 50/94] cpufreq: Merge cpufreq_offline_prepare/finish routines Commit 1aee40ac9c86 (cpufreq: Invoke __cpufreq_remove_dev_finish() after releasing cpu_hotplug.lock) split the cpufreq's CPU offline routine in two pieces, one of them to be run with CPU offline/online locked and the other to be called later. The reason for that split was a possible deadlock scenario involving cpufreq sysfs attributes and CPU offline. However, the handling of CPU offline in cpufreq has changed since then. Policy sysfs attributes are never removed during CPU offline, so there's no need to worry about accessing them during CPU offline, because that can't lead to any deadlocks now. Governor sysfs attributes are still removed in __cpufreq_governor(_EXIT), but there is a new kobject type for them now and its show/store callbacks don't lock CPU offline/online (they don't need to do that). This means that the CPU offline code in cpufreq doesn't need to be split any more, so combine cpufreq_offline_prepare() with cpufreq_offline_finish(). Signed-off-by: Viresh Kumar [ rjw: Changelog ] Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 42 ++++++++++++--------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e92e9eab7c6c..f65553dc48c9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1362,9 +1362,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return ret; } -static void cpufreq_offline_prepare(unsigned int cpu) +static void cpufreq_offline(unsigned int cpu) { struct cpufreq_policy *policy; + int ret; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1375,7 +1376,7 @@ static void cpufreq_offline_prepare(unsigned int cpu) } if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1398,34 +1399,23 @@ static void cpufreq_offline_prepare(unsigned int cpu) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - } else if (cpufreq_driver->stop_cpu) { + + return; + } + + if (cpufreq_driver->stop_cpu) cpufreq_driver->stop_cpu(policy); - } -} - -static void cpufreq_offline_finish(unsigned int cpu) -{ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); - return; - } - - /* Only proceed for inactive policies */ - if (!policy_is_inactive(policy)) - return; /* If cpu is last user of policy, free policy */ if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1454,10 +1444,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - if (cpu_online(cpu)) { - cpufreq_offline_prepare(cpu); - cpufreq_offline_finish(cpu); - } + if (cpu_online(cpu)) + cpufreq_offline(cpu); cpumask_clear_cpu(cpu, policy->real_cpus); remove_cpu_dev_symlink(policy, cpu); @@ -2305,11 +2293,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - cpufreq_offline_prepare(cpu); - break; - - case CPU_POST_DEAD: - cpufreq_offline_finish(cpu); + cpufreq_offline(cpu); break; case CPU_DOWN_FAILED: From 49f18560f8bac5315047edfb673dd13d56cbcbc9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:12 +0530 Subject: [PATCH 51/94] cpufreq: Call __cpufreq_governor() with policy->rwsem held The cpufreq core code is not consistent with respect to invoking __cpufreq_governor() under policy->rwsem. Changing all code to always hold policy->rwsem around __cpufreq_governor() invocations will allow us to remove cpufreq_governor_lock that is used today because we can't guarantee that __cpufreq_governor() isn't executed twice in parallel for the same policy. We should also ensure that policy->rwsem is held across governor state changes. For example, while adding a CPU to the policy in the CPU online path, we need to stop the governor, change policy->cpus, start the governor and then refresh its limits. The complete sequence must be guaranteed to complete without interruptions by concurrent governor state updates. That can be achieved by holding policy->rwsem around those sequences of operations. Also note that after this patch cpufreq_driver->stop_cpu() and ->exit() will get called under policy->rwsem which wasn't the case earlier. That shouldn't have any side effects, though. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 49 ++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f65553dc48c9..692876892457 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1049,30 +1049,29 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (cpumask_test_cpu(cpu, policy->cpus)) return 0; + down_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { pr_err("%s: Failed to stop governor\n", __func__); - return ret; + goto unlock; } } - down_write(&policy->rwsem); cpumask_set_cpu(cpu, policy->cpus); - up_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - if (ret) { + if (ret) pr_err("%s: Failed to start governor\n", __func__); - return ret; - } } - return 0; +unlock: + up_write(&policy->rwsem); + return ret; } static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) @@ -1375,13 +1374,13 @@ static void cpufreq_offline(unsigned int cpu) return; } + down_write(&policy->rwsem); if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } - down_write(&policy->rwsem); cpumask_clear_cpu(cpu, policy->cpus); if (policy_is_inactive(policy)) { @@ -1394,7 +1393,6 @@ static void cpufreq_offline(unsigned int cpu) /* Nominate new CPU */ policy->cpu = cpumask_any(policy->cpus); } - up_write(&policy->rwsem); /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { @@ -1407,7 +1405,7 @@ static void cpufreq_offline(unsigned int cpu) pr_err("%s: Failed to start governor\n", __func__); } - return; + goto unlock; } if (cpufreq_driver->stop_cpu) @@ -1429,6 +1427,9 @@ static void cpufreq_offline(unsigned int cpu) cpufreq_driver->exit(policy); policy->freq_table = NULL; } + +unlock: + up_write(&policy->rwsem); } /** @@ -1625,6 +1626,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend); void cpufreq_suspend(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1635,7 +1637,11 @@ void cpufreq_suspend(void) pr_debug("%s: Suspending Governors\n", __func__); for_each_active_policy(policy) { - if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) + down_write(&policy->rwsem); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + up_write(&policy->rwsem); + + if (ret) pr_err("%s: Failed to stop governor for policy: %p\n", __func__, policy); else if (cpufreq_driver->suspend @@ -1657,6 +1663,7 @@ suspend: void cpufreq_resume(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1669,13 +1676,20 @@ void cpufreq_resume(void) pr_debug("%s: Resuming Governors\n", __func__); for_each_active_policy(policy) { - if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) + if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { pr_err("%s: Failed to resume driver: %p\n", __func__, policy); - else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) - || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) - pr_err("%s: Failed to start governor for policy: %p\n", - __func__, policy); + } else { + down_write(&policy->rwsem); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) + __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); + + if (ret) + pr_err("%s: Failed to start governor for policy: %p\n", + __func__, policy); + } } /* @@ -2326,8 +2340,11 @@ static int cpufreq_boost_set_sw(int state) __func__); break; } + + down_write(&policy->rwsem); policy->user_policy.max = policy->max; __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); } } From 99522fe6788f5bf627dce7c20ed9484c933511a3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:13 +0530 Subject: [PATCH 52/94] cpufreq: Remove cpufreq_governor_lock We used to drop policy->rwsem just before calling __cpufreq_governor() in some cases earlier and so it was possible that __cpufreq_governor() ran concurrently via separate threads for the same policy. In order to guarantee valid state transitions for governors, 'governor_enabled' was required to be protected using some locking and cpufreq_governor_lock was added for that. But now __cpufreq_governor() is always called under policy->rwsem, and 'governor_enabled' is protected against races even without cpufreq_governor_lock. Get rid of the extra lock now. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw : Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 -------- drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 692876892457..bc93272b4a12 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -147,8 +147,6 @@ void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) rcu_read_unlock(); } -DEFINE_MUTEX(cpufreq_governor_lock); - /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; @@ -2015,11 +2013,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); - mutex_lock(&cpufreq_governor_lock); if ((policy->governor_enabled && event == CPUFREQ_GOV_START) || (!policy->governor_enabled && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { - mutex_unlock(&cpufreq_governor_lock); return -EBUSY; } @@ -2028,8 +2024,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, else if (event == CPUFREQ_GOV_START) policy->governor_enabled = true; - mutex_unlock(&cpufreq_governor_lock); - ret = policy->governor->governor(policy, event); if (!ret) { @@ -2039,12 +2033,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->governor->initialized--; } else { /* Restore original values */ - mutex_lock(&cpufreq_governor_lock); if (event == CPUFREQ_GOV_STOP) policy->governor_enabled = true; else if (event == CPUFREQ_GOV_START) policy->governor_enabled = false; - mutex_unlock(&cpufreq_governor_lock); } if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8bf4775ce03c..e9ec411042c3 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -232,7 +232,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) } extern struct mutex dbs_data_mutex; -extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) From 581c214b21e4faba06d913952e38e80635d9ada5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:14 +0530 Subject: [PATCH 53/94] cpufreq: governor: No need to manage state machine now The cpufreq core now guarantees that policy->rwsem won't be dropped while running the ->governor callback for the CPUFREQ_GOV_POLICY_EXIT event and will be held acquired until the complete sequence of governor state changes has finished. This allows governor state machine checks to be dropped from multiple functions in cpufreq_governor.c. This also means that policy_dbs->policy can be initialized upfront, so the entire initialization of struct policy_dbs can be carried out in one place. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 2f35270fbd43..a34de9d10cbc 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -233,8 +233,10 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_rcu(); } -static void gov_cancel_work(struct policy_dbs_info *policy_dbs) +static void gov_cancel_work(struct cpufreq_policy *policy) { + struct policy_dbs_info *policy_dbs = policy->governor_data; + /* Tell dbs_update_util_handler() to skip queuing up work items. */ atomic_inc(&policy_dbs->work_count); /* @@ -331,6 +333,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli if (!policy_dbs) return NULL; + policy_dbs->policy = policy; mutex_init(&policy_dbs->timer_mutex); atomic_set(&policy_dbs->work_count, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); @@ -458,10 +461,6 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy_dbs->dbs_data; int count; - /* State should be equivalent to INIT */ - if (policy_dbs->policy) - return -EBUSY; - mutex_lock(&dbs_data->mutex); list_del(&policy_dbs->list); count = --dbs_data->usage_count; @@ -497,10 +496,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!policy->cur) return -EINVAL; - /* State should be equivalent to INIT */ - if (policy_dbs->policy) - return -EBUSY; - sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; @@ -525,7 +520,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } - policy_dbs->policy = policy; if (gov->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -548,14 +542,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct policy_dbs_info *policy_dbs = policy->governor_data; - - /* State should be equivalent to START */ - if (!policy_dbs->policy) - return -EBUSY; - - gov_cancel_work(policy_dbs); - policy_dbs->policy = NULL; + gov_cancel_work(policy); return 0; } @@ -564,10 +551,6 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; - /* State should be equivalent to START */ - if (!policy_dbs->policy) - return -EBUSY; - mutex_lock(&policy_dbs->timer_mutex); if (policy->max < policy->cur) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); From aded387b94b69aeab10e1d112bab7f82c9241527 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:15 +0530 Subject: [PATCH 54/94] cpufreq: conservative: Update sample_delay_ns immediately The ondemand governor already updates sample_delay_ns immediately on updates to the sampling rate, but conservative doesn't do that. It was left out earlier as the code was really too complex to get that done easily. Things are sorted out very well now, however, and the conservative governor can be modified to follow ondemand in that respect. Moreover, since the code needed to implement that in the conservative governor would be identical to the corresponding ondemand governor's code, make that code common and change both governors to use it. Signed-off-by: Viresh Kumar Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 14 ------ drivers/cpufreq/cpufreq_governor.c | 63 +++++++++++++++++++++++ drivers/cpufreq/cpufreq_governor.h | 2 + drivers/cpufreq/cpufreq_ondemand.c | 69 -------------------------- 4 files changed, 65 insertions(+), 83 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ed081dbce00c..6243502ce24d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -136,20 +136,6 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, return count; } -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - - if (ret != 1) - return -EINVAL; - - dbs_data->sampling_rate = max(input, dbs_data->min_sampling_rate); - return count; -} - static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index a34de9d10cbc..d41db19a9bb7 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -25,6 +25,69 @@ DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); +/* Common sysfs tunables */ +/** + * store_sampling_rate - update sampling rate effective immediately if needed. + * + * If new rate is smaller than the old, simply updating + * dbs.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. + * + * On the other hand, if new rate is larger than the old, then we may evaluate + * the load too soon, and it might we worth updating sample_delay_ns then as + * well. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. + */ +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct policy_dbs_info *policy_dbs; + unsigned int rate; + int ret; + ret = sscanf(buf, "%u", &rate); + if (ret != 1) + return -EINVAL; + + dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate); + + /* + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. + */ + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); + /* + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. If the returned value is too + * small, the sample will be taken immediately, but that isn't a + * problem, as we want the new rate to take effect immediately + * anyway. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but the difference should not be too big and it will + * be corrected next time a sample is taken, so it shouldn't be + * significant. + */ + gov_update_sample_delay(policy_dbs, dbs_data->sampling_rate); + mutex_unlock(&policy_dbs->timer_mutex); + } + + return count; +} +EXPORT_SYMBOL_GPL(store_sampling_rate); + static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { return container_of(kobj, struct dbs_data, kobj); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index e9ec411042c3..8138eff5e25b 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -238,4 +238,6 @@ void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 38301c6b31c7..12213823cc93 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -221,75 +221,6 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ static struct dbs_governor od_dbs_gov; -/** - * update_sampling_rate - update sampling rate effective immediately if needed. - * @new_rate: new sampling rate - * - * If new rate is smaller than the old, simply updating - * dbs.sampling_rate might not be appropriate. For example, if the - * original sampling_rate was 1 second and the requested new sampling rate is 10 - * ms because the user needs immediate reaction from ondemand governor, but not - * sure if higher frequency will be required or not, then, the governor may - * change the sampling rate too late; up to 1 second later. Thus, if we are - * reducing the sampling rate, we need to make the new value effective - * immediately. - * - * On the other hand, if new rate is larger than the old, then we may evaluate - * the load too soon, and it might we worth updating sample_delay_ns then as - * well. - * - * This must be called with dbs_data->mutex held, otherwise traversing - * policy_dbs_list isn't safe. - */ -static void update_sampling_rate(struct dbs_data *dbs_data, - unsigned int new_rate) -{ - struct policy_dbs_info *policy_dbs; - - dbs_data->sampling_rate = new_rate = max(new_rate, - dbs_data->min_sampling_rate); - - /* - * We are operating under dbs_data->mutex and so the list and its - * entries can't be freed concurrently. - */ - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { - mutex_lock(&policy_dbs->timer_mutex); - /* - * On 32-bit architectures this may race with the - * sample_delay_ns read in dbs_update_util_handler(), but that - * really doesn't matter. If the read returns a value that's - * too big, the sample will be skipped, but the next invocation - * of dbs_update_util_handler() (when the update has been - * completed) will take a sample. If the returned value is too - * small, the sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect immediately - * anyway. - * - * If this runs in parallel with dbs_work_handler(), we may end - * up overwriting the sample_delay_ns value that it has just - * written, but the difference should not be too big and it will - * be corrected next time a sample is taken, so it shouldn't be - * significant. - */ - gov_update_sample_delay(policy_dbs, new_rate); - mutex_unlock(&policy_dbs->timer_mutex); - } -} - -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - if (ret != 1) - return -EINVAL; - - update_sampling_rate(dbs_data, input); - return count; -} - static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, size_t count) { From a23d6d180914dd91e320283c81e4f84f028e24f4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Feb 2016 17:31:16 +0530 Subject: [PATCH 55/94] cpufreq: ondemand: Rearrange od_dbs_timer() to avoid updating delay Avoid extra checks in od_dbs_timer() by rearranging updates to the local delay variable in it. Signed-off-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 12213823cc93..0b79f1488be4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -194,7 +194,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - int delay = 0, sample_type = dbs_info->sample_type; + int delay, sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -208,13 +208,12 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; delay = dbs_info->freq_hi_jiffies; + } else { + delay = delay_for_sampling_rate(dbs_data->sampling_rate + * dbs_info->rate_mult); } } - if (!delay) - delay = delay_for_sampling_rate(dbs_data->sampling_rate - * dbs_info->rate_mult); - return delay; } From b9db42730aeb23f91d7585786de25a260ab04098 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 22:15:34 +0100 Subject: [PATCH 56/94] cpufreq: governor: Avoid irq_work_queue_on() crash on non-SMP ARM As it turns out, irq_work_queue_on() will crash if invoked on non-SMP ARM platforms, but in fact it is not necessary to use that function in the cpufreq governor code (as it doesn't matter to that code which CPU will handle the irq_work), so change it to always use irq_work_queue(). Fixes: 8fb47ff100af (cpufreq: governor: Replace timers with utilization update callbacks) Reported-and-tested-by: Guenter Roeck Reported-and-tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index d41db19a9bb7..580b692d6df4 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -350,15 +350,6 @@ static void dbs_irq_work(struct irq_work *irq_work) schedule_work(&policy_dbs->work); } -static inline void gov_queue_irq_work(struct policy_dbs_info *policy_dbs) -{ -#ifdef CONFIG_SMP - irq_work_queue_on(&policy_dbs->irq_work, smp_processor_id()); -#else - irq_work_queue(&policy_dbs->irq_work); -#endif -} - static void dbs_update_util_handler(struct update_util_data *data, u64 time, unsigned long util, unsigned long max) { @@ -378,7 +369,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, delta_ns = time - policy_dbs->last_sample_time; if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { policy_dbs->last_sample_time = time; - gov_queue_irq_work(policy_dbs); + irq_work_queue(&policy_dbs->irq_work); return; } } From f62b93740c30d0a3f50258d45415f00b763dd70a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:12:56 +0100 Subject: [PATCH 57/94] cpufreq: governor: Simplify gov_cancel_work() slightly The atomic work counter incrementation in gov_cancel_work() is not necessary any more, because work items won't be queued up after gov_clear_update_util() anyway, so drop it along with the comment about how it may be missed by the gov_clear_update_util(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 580b692d6df4..c78af11a51f0 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -300,13 +300,6 @@ static void gov_cancel_work(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; - /* Tell dbs_update_util_handler() to skip queuing up work items. */ - atomic_inc(&policy_dbs->work_count); - /* - * If dbs_update_util_handler() is already running, it may not notice - * the incremented work_count, so wait for it to complete to prevent its - * work item from being queued up after the cancel_work_sync() below. - */ gov_clear_update_util(policy_dbs->policy); irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); @@ -360,7 +353,6 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * The work may not be allowed to be queued up right now. * Possible reasons: * - Work has already been queued up or is in progress. - * - The governor is being stopped. * - It is too early (too little time from the previous sample). */ if (atomic_inc_return(&policy_dbs->work_count) == 1) { From e4db2813d2e558b6b6bee464308678a57732b390 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:13:42 +0100 Subject: [PATCH 58/94] cpufreq: governor: Avoid atomic operations in hot paths Rework the handling of work items by dbs_update_util_handler() and dbs_work_handler() so the former (which is executed in scheduler paths) only uses atomic operations when absolutely necessary. That is, when the policy is shared and dbs_update_util_handler() has already decided that this is the time to queue up a work item. In particular, this avoids the atomic ops entirely on platforms where policy objects are never shared. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 49 +++++++++++++++++++++--------- drivers/cpufreq/cpufreq_governor.h | 3 ++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c78af11a51f0..e5a08a13ca84 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -304,6 +304,7 @@ static void gov_cancel_work(struct cpufreq_policy *policy) irq_work_sync(&policy_dbs->irq_work); cancel_work_sync(&policy_dbs->work); atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; } static void dbs_work_handler(struct work_struct *work) @@ -326,13 +327,15 @@ static void dbs_work_handler(struct work_struct *work) policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); mutex_unlock(&policy_dbs->timer_mutex); + /* Allow the utilization update handler to queue up more work. */ + atomic_set(&policy_dbs->work_count, 0); /* - * If the atomic operation below is reordered with respect to the - * sample delay modification, the utilization update handler may end - * up using a stale sample delay value. + * If the update below is reordered with respect to the sample delay + * modification, the utilization update handler may end up using a stale + * sample delay value. */ - smp_mb__before_atomic(); - atomic_dec(&policy_dbs->work_count); + smp_wmb(); + policy_dbs->work_in_progress = false; } static void dbs_irq_work(struct irq_work *irq_work) @@ -348,6 +351,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + u64 delta_ns; /* * The work may not be allowed to be queued up right now. @@ -355,17 +359,30 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * - Work has already been queued up or is in progress. * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&policy_dbs->work_count) == 1) { - u64 delta_ns; + if (policy_dbs->work_in_progress) + return; - delta_ns = time - policy_dbs->last_sample_time; - if ((s64)delta_ns >= policy_dbs->sample_delay_ns) { - policy_dbs->last_sample_time = time; - irq_work_queue(&policy_dbs->irq_work); - return; - } - } - atomic_dec(&policy_dbs->work_count); + /* + * If the reads below are reordered before the check above, the value + * of sample_delay_ns used in the computation may be stale. + */ + smp_rmb(); + delta_ns = time - policy_dbs->last_sample_time; + if ((s64)delta_ns < policy_dbs->sample_delay_ns) + return; + + /* + * If the policy is not shared, the irq_work may be queued up right away + * at this point. Otherwise, we need to ensure that only one of the + * CPUs sharing the policy will do that. + */ + if (policy_dbs->is_shared && + !atomic_add_unless(&policy_dbs->work_count, 1, 1)) + return; + + policy_dbs->last_sample_time = time; + policy_dbs->work_in_progress = true; + irq_work_queue(&policy_dbs->irq_work); } static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, @@ -542,6 +559,8 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) if (!policy->cur) return -EINVAL; + policy_dbs->is_shared = policy_is_shared(policy); + sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8138eff5e25b..521daac38ba5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,6 +130,9 @@ struct policy_dbs_info { /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; struct list_head list; + /* Status indicators */ + bool is_shared; /* This object is used by multiple CPUs */ + bool work_in_progress; /* Work is being queued up or in progress */ }; static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, From 679b8fe43a6b723787cae1d9599ed776d7ce238b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:15:50 +0100 Subject: [PATCH 59/94] cpufreq: governor: Fix nice contribution computation in dbs_check_cpu() The contribution of the CPU nice time to the idle time in dbs_check_cpu() is computed in a bogus way, as the code may subtract current and previous nice values for different CPUs. That doesn't matter for cases when cpufreq policies are not shared, but may lead to problems otherwise. Fix the computation and simplify it to avoid taking unnecessary steps. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e5a08a13ca84..c5469701a3ef 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -198,22 +198,10 @@ void dbs_check_cpu(struct cpufreq_policy *policy) j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); - u64 cur_nice; - unsigned long cur_nice_jiffies; + u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - - cdbs->prev_cpu_nice; - /* - * Assumption: nice time between sampling periods will - * be less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - - cdbs->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - idle_time += jiffies_to_usecs(cur_nice_jiffies); + idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); + j_cdbs->prev_cpu_nice = cur_nice; } if (unlikely(!wall_time || wall_time < idle_time)) From 57eb832f90e645dcb97d651ad052c0537cc1b3a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 00:58:47 +0100 Subject: [PATCH 60/94] cpufreq: governor: Clean up load-related computations Clean up some load-related computations in dbs_check_cpu() and cpufreq_governor_start() to get rid of unnecessary operations and type casts and make the code easier to read. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c5469701a3ef..1f580cb62902 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -186,16 +186,15 @@ void dbs_check_cpu(struct cpufreq_policy *policy) io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); - wall_time = (unsigned int) - (cur_wall_time - j_cdbs->prev_cpu_wall); + wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; j_cdbs->prev_cpu_wall = cur_wall_time; - if (cur_idle_time < j_cdbs->prev_cpu_idle) - cur_idle_time = j_cdbs->prev_cpu_idle; - - idle_time = (unsigned int) - (cur_idle_time - j_cdbs->prev_cpu_idle); - j_cdbs->prev_cpu_idle = cur_idle_time; + if (cur_idle_time <= j_cdbs->prev_cpu_idle) { + idle_time = 0; + } else { + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + j_cdbs->prev_cpu_idle = cur_idle_time; + } if (ignore_nice) { u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -562,13 +561,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->prev_cpu_idle = - get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); - prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - - j_cdbs->prev_cpu_idle); - j_cdbs->prev_load = 100 * prev_load / - (unsigned int)j_cdbs->prev_cpu_wall; + prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; + j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; From 4cccf7555770b787fa80791a1407a27301f03920 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:19:31 +0100 Subject: [PATCH 61/94] cpufreq: governor: Get rid of the ->gov_check_cpu callback The way the ->gov_check_cpu governor callback is used by the ondemand and conservative governors is not really straightforward. Namely, the governor calls dbs_check_cpu() that updates the load information for the policy and the invokes ->gov_check_cpu() for the governor. To get rid of that entanglement, notice that cpufreq_governor_limits() doesn't need to call dbs_check_cpu() directly. Instead, it can simply reset the sample delay to 0 which will cause a sample to be taken immediately. The result of that is practically equivalent to calling dbs_check_cpu() except that it will trigger a full update of governor internal state and not just the ->gov_check_cpu() part. Following that observation, make cpufreq_governor_limits() reset the sample delay and turn dbs_check_cpu() into a function that will simply evaluate the load and return the result called dbs_update(). That function can now be called by governors from the routines that previously were pointed to by ->gov_check_cpu and those routines can be called directly by each governor instead of dbs_check_cpu(). This way ->gov_check_cpu becomes unnecessary, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 26 +++++++++----------------- drivers/cpufreq/cpufreq_governor.c | 15 ++++++++------- drivers/cpufreq/cpufreq_governor.h | 3 +-- drivers/cpufreq/cpufreq_ondemand.c | 15 +++++++++------ 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 6243502ce24d..2e9040e8640c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -44,20 +44,20 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, * Any frequency increase takes it to the maximum frequency. Frequency reduction * happens at minimum steps of 5% (default) of maximum frequency */ -static void cs_check_cpu(int cpu, unsigned int load) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy_dbs->policy; + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); /* * break out if we 'cannot' reduce the speed as the user might * want freq_step to be zero */ if (cs_tuners->freq_step == 0) - return; + goto out; /* Check for frequency increase */ if (load > dbs_data->up_threshold) { @@ -65,7 +65,7 @@ static void cs_check_cpu(int cpu, unsigned int load) /* if we are already at full speed then break out early */ if (dbs_info->requested_freq == policy->max) - return; + goto out; dbs_info->requested_freq += get_freq_target(cs_tuners, policy); @@ -74,12 +74,12 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_H); - return; + goto out; } /* if sampling_down_factor is active break out early */ if (++dbs_info->down_skip < dbs_data->sampling_down_factor) - return; + goto out; dbs_info->down_skip = 0; /* Check for frequency decrease */ @@ -89,7 +89,7 @@ static void cs_check_cpu(int cpu, unsigned int load) * if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) - return; + goto out; freq_target = get_freq_target(cs_tuners, policy); if (dbs_info->requested_freq > freq_target) @@ -99,16 +99,9 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_L); - return; } -} -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - struct dbs_data *dbs_data = policy_dbs->dbs_data; - - dbs_check_cpu(policy); + out: return delay_for_sampling_rate(dbs_data->sampling_rate); } @@ -300,7 +293,6 @@ static struct dbs_governor cs_dbs_gov = { .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, - .gov_check_cpu = cs_check_cpu, .init = cs_init, .exit = cs_exit, }; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 1f580cb62902..99d25af6485b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -140,9 +140,8 @@ static const struct sysfs_ops governor_sysfs_ops = { .store = governor_store, }; -void dbs_check_cpu(struct cpufreq_policy *policy) +unsigned int dbs_update(struct cpufreq_policy *policy) { - int cpu = policy->cpu; struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; @@ -154,7 +153,7 @@ void dbs_check_cpu(struct cpufreq_policy *policy) if (gov->governor == GOV_ONDEMAND) { struct od_cpu_dbs_info_s *od_dbs_info = - gov->get_cpu_dbs_info_s(cpu); + gov->get_cpu_dbs_info_s(policy->cpu); /* * Sometimes, the ondemand governor uses an additional @@ -250,10 +249,9 @@ void dbs_check_cpu(struct cpufreq_policy *policy) if (load > max_load) max_load = load; } - - gov->gov_check_cpu(cpu, max_load); + return max_load; } -EXPORT_SYMBOL_GPL(dbs_check_cpu); +EXPORT_SYMBOL_GPL(dbs_update); void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) @@ -601,11 +599,14 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; mutex_lock(&policy_dbs->timer_mutex); + if (policy->max < policy->cur) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); else if (policy->min > policy->cur) __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(policy); + + gov_update_sample_delay(policy_dbs, 0); + mutex_unlock(&policy_dbs->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 521daac38ba5..38b9512820b0 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -202,7 +202,6 @@ struct dbs_governor { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); - void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -235,7 +234,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) } extern struct mutex dbs_data_mutex; -void dbs_check_cpu(struct cpufreq_policy *policy); +unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 0b79f1488be4..707c017f4e67 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -150,13 +150,13 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ -static void od_check_cpu(int cpu, unsigned int load) +static void od_update(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; - struct cpufreq_policy *policy = policy_dbs->policy; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); dbs_info->freq_lo = 0; @@ -198,12 +198,16 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; - if (sample_type == OD_SUB_SAMPLE) { + /* + * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore + * it then. + */ + if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { delay = dbs_info->freq_lo_jiffies; __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { - dbs_check_cpu(policy); + od_update(policy); if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; @@ -428,7 +432,6 @@ static struct dbs_governor od_dbs_gov = { .get_cpu_cdbs = get_cpu_cdbs, .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, - .gov_check_cpu = od_check_cpu, .gov_ops = &od_ops, .init = od_init, .exit = od_exit, From 78347cdb89065f9d40ea28596ef2bd8058eb6d12 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:20:11 +0100 Subject: [PATCH 62/94] cpufreq: governor: Reset sample delay in store_sampling_rate() If store_sampling_rate() updates the sample delay when the ondemand governor is in the middle of its high/low dance (OD_SUB_SAMPLE sample type is set), the governor will still do the bottom half of the previous sample which may take too much time. To prevent that from happening, change store_sampling_rate() to always reset the sample delay to 0 which also is consistent with the new behavior of cpufreq_governor_limits(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 99d25af6485b..fd4cdc2db238 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -38,10 +38,6 @@ EXPORT_SYMBOL_GPL(dbs_data_mutex); * reducing the sampling rate, we need to make the new value effective * immediately. * - * On the other hand, if new rate is larger than the old, then we may evaluate - * the load too soon, and it might we worth updating sample_delay_ns then as - * well. - * * This must be called with dbs_data->mutex held, otherwise traversing * policy_dbs_list isn't safe. */ @@ -69,18 +65,14 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, * really doesn't matter. If the read returns a value that's * too big, the sample will be skipped, but the next invocation * of dbs_update_util_handler() (when the update has been - * completed) will take a sample. If the returned value is too - * small, the sample will be taken immediately, but that isn't a - * problem, as we want the new rate to take effect immediately - * anyway. + * completed) will take a sample. * * If this runs in parallel with dbs_work_handler(), we may end * up overwriting the sample_delay_ns value that it has just - * written, but the difference should not be too big and it will - * be corrected next time a sample is taken, so it shouldn't be - * significant. + * written, but it will be corrected next time a sample is + * taken, so it shouldn't be significant. */ - gov_update_sample_delay(policy_dbs, dbs_data->sampling_rate); + gov_update_sample_delay(policy_dbs, 0); mutex_unlock(&policy_dbs->timer_mutex); } From 57dc3bcd454eb420ddf25d89852993b61b351327 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:20:51 +0100 Subject: [PATCH 63/94] cpufreq: governor: Move rate_mult to struct policy_dbs The rate_mult field in struct od_cpu_dbs_info_s is used by the code shared with the conservative governor and to access it that code has to do an ugly governor type check. However, first of all it is ever only used for policy->cpu, so it is per-policy rather than per-CPU and second, it is initialized to 1 by cpufreq_governor_start(), so if the conservative governor never modifies it, it will have no effect on the results of any computations. For these reasons, move rate_mult to struct policy_dbs_info (as a common field). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 25 +++++++++---------------- drivers/cpufreq/cpufreq_governor.h | 3 ++- drivers/cpufreq/cpufreq_ondemand.c | 23 +++++++++++++++-------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index fd4cdc2db238..b002c0d626ea 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -138,24 +138,17 @@ unsigned int dbs_update(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - unsigned int sampling_rate = dbs_data->sampling_rate; unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int j; + unsigned int sampling_rate, j; - if (gov->governor == GOV_ONDEMAND) { - struct od_cpu_dbs_info_s *od_dbs_info = - gov->get_cpu_dbs_info_s(policy->cpu); - - /* - * Sometimes, the ondemand governor uses an additional - * multiplier to give long delays. So apply this multiplier to - * the 'sampling_rate', so as to keep the wake-up-from-idle - * detection logic a bit conservative. - */ - sampling_rate *= od_dbs_info->rate_mult; - - } + /* + * Sometimes governors may use an additional multiplier to increase + * sample delays temporarily. Apply that multiplier to sampling_rate + * so as to keep the wake-up-from-idle detection logic a bit + * conservative. + */ + sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { @@ -537,6 +530,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) return -EINVAL; policy_dbs->is_shared = policy_is_shared(policy); + policy_dbs->rate_mult = 1; sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; @@ -570,7 +564,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct od_ops *od_ops = gov->gov_ops; struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); - od_dbs_info->rate_mult = 1; od_dbs_info->sample_type = OD_NORMAL_SAMPLE; od_ops->powersave_bias_init_cpu(cpu); } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 38b9512820b0..f21d1e125cba 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,6 +130,8 @@ struct policy_dbs_info { /* dbs_data may be shared between multiple policy objects */ struct dbs_data *dbs_data; struct list_head list; + /* Multiplier for increasing sample delay temporarily. */ + unsigned int rate_mult; /* Status indicators */ bool is_shared; /* This object is used by multiple CPUs */ bool work_in_progress; /* Work is being queued up or in progress */ @@ -163,7 +165,6 @@ struct od_cpu_dbs_info_s { unsigned int freq_lo; unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; - unsigned int rate_mult; unsigned int sample_type:1; }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 707c017f4e67..812d9949a0c4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -164,7 +164,7 @@ static void od_update(struct cpufreq_policy *policy) if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = dbs_data->sampling_down_factor; + policy_dbs->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -175,7 +175,7 @@ static void od_update(struct cpufreq_policy *policy) freq_next = min_f + load * (max_f - min_f) / 100; /* No longer fully busy, reset rate_mult */ - dbs_info->rate_mult = 1; + policy_dbs->rate_mult = 1; if (!od_tuners->powersave_bias) { __cpufreq_driver_target(policy, freq_next, @@ -214,7 +214,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) delay = dbs_info->freq_hi_jiffies; } else { delay = delay_for_sampling_rate(dbs_data->sampling_rate - * dbs_info->rate_mult); + * policy_dbs->rate_mult); } } @@ -266,20 +266,27 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - unsigned int input, j; + struct policy_dbs_info *policy_dbs; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; + dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->rate_mult = 1; + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + /* + * Doing this without locking might lead to using different + * rate_mult values in od_update() and od_dbs_timer(). + */ + mutex_lock(&policy_dbs->timer_mutex); + policy_dbs->rate_mult = 1; + mutex_unlock(&policy_dbs->timer_mutex); } + return count; } From 6e96c5b3ac5181d4b787590e54c4af99d3fa5f2e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:21:35 +0100 Subject: [PATCH 64/94] cpufreq: ondemand: Simplify conditionals in od_dbs_timer() Reduce the indentation level in the conditionals in od_dbs_timer() and drop the delay variable from it. No functional changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 812d9949a0c4..cb5a097c19ea 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -194,7 +194,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - int delay, sample_type = dbs_info->sample_type; + int sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; @@ -203,22 +203,20 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) * it then. */ if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { - delay = dbs_info->freq_lo_jiffies; __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); - } else { - od_update(policy); - if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = OD_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; - } else { - delay = delay_for_sampling_rate(dbs_data->sampling_rate - * policy_dbs->rate_mult); - } + return dbs_info->freq_lo_jiffies; } - return delay; + od_update(policy); + + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = OD_SUB_SAMPLE; + return dbs_info->freq_hi_jiffies; + } + + return delay_for_sampling_rate(dbs_data->sampling_rate * policy_dbs->rate_mult); } /************************** sysfs interface ************************/ From 07aa4402a009bc83194860e7869c491bab854d1c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 02:22:13 +0100 Subject: [PATCH 65/94] cpufreq: governor: Use microseconds in sample delay computations Do not convert microseconds to jiffies and the other way around in governor computations related to the sampling rate and sample delay and drop delay_for_sampling_rate() which isn't of any use then. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 4 +--- drivers/cpufreq/cpufreq_governor.h | 15 ++------------ drivers/cpufreq/cpufreq_ondemand.c | 28 ++++++++++++-------------- 4 files changed, 17 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2e9040e8640c..4a6f8e1ed72e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -102,7 +102,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) } out: - return delay_for_sampling_rate(dbs_data->sampling_rate); + return dbs_data->sampling_rate; } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b002c0d626ea..56dba71d1788 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -282,7 +282,6 @@ static void dbs_work_handler(struct work_struct *work) struct policy_dbs_info *policy_dbs; struct cpufreq_policy *policy; struct dbs_governor *gov; - unsigned int delay; policy_dbs = container_of(work, struct policy_dbs_info, work); policy = policy_dbs->policy; @@ -293,8 +292,7 @@ static void dbs_work_handler(struct work_struct *work) * ondemand governor isn't updating the sampling rate in parallel. */ mutex_lock(&policy_dbs->timer_mutex); - delay = gov->gov_dbs_timer(policy); - policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay); + gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); mutex_unlock(&policy_dbs->timer_mutex); /* Allow the utilization update handler to queue up more work. */ diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index f21d1e125cba..7ae0c71143fa 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -163,8 +163,8 @@ struct od_cpu_dbs_info_s { struct cpu_dbs_info cdbs; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; - unsigned int freq_lo_jiffies; - unsigned int freq_hi_jiffies; + unsigned int freq_lo_delay_us; + unsigned int freq_hi_delay_us; unsigned int sample_type:1; }; @@ -223,17 +223,6 @@ struct od_ops { void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; -static inline int delay_for_sampling_rate(unsigned int sampling_rate) -{ - int delay = usecs_to_jiffies(sampling_rate); - - /* We want all CPUs to do sampling nearly on same jiffy */ - if (num_online_cpus() > 1) - delay -= jiffies % delay; - - return delay; -} - extern struct mutex dbs_data_mutex; unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index cb5a097c19ea..a3ee74577404 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -66,8 +66,8 @@ static int should_io_be_busy(void) /* * Find right freq to be set now with powersave_bias on. - * Returns the freq_hi to be used right now and will set freq_hi_jiffies, - * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + * Returns the freq_hi to be used right now and will set freq_hi_delay_us, + * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. */ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation) @@ -75,7 +75,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; unsigned int index = 0; - unsigned int jiffies_total, jiffies_hi, jiffies_lo; + unsigned int delay_hi_us; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -84,7 +84,7 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, if (!dbs_info->freq_table) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_next; } @@ -107,17 +107,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, /* Find out how long we have to be in hi and lo freqs */ if (freq_hi == freq_lo) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(dbs_data->sampling_rate); - jiffies_hi = (freq_avg - freq_lo) * jiffies_total; - jiffies_hi += ((freq_hi - freq_lo) / 2); - jiffies_hi /= (freq_hi - freq_lo); - jiffies_lo = jiffies_total - jiffies_hi; + delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; + delay_hi_us += (freq_hi - freq_lo) / 2; + delay_hi_us /= freq_hi - freq_lo; + dbs_info->freq_hi_delay_us = delay_hi_us; dbs_info->freq_lo = freq_lo; - dbs_info->freq_lo_jiffies = jiffies_lo; - dbs_info->freq_hi_jiffies = jiffies_hi; + dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; return freq_hi; } @@ -205,7 +203,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); - return dbs_info->freq_lo_jiffies; + return dbs_info->freq_lo_delay_us; } od_update(policy); @@ -213,10 +211,10 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; - return dbs_info->freq_hi_jiffies; + return dbs_info->freq_hi_delay_us; } - return delay_for_sampling_rate(dbs_data->sampling_rate * policy_dbs->rate_mult); + return dbs_data->sampling_rate * policy_dbs->rate_mult; } /************************** sysfs interface ************************/ From a7f35cffb980f3aec75f74559a4320974c845b78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 21:02:24 +0100 Subject: [PATCH 66/94] cpufreq: ondemand: Simplify od_update() slightly Drop some lines of code from od_update() by arranging the statements in there in a more logical way. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a3ee74577404..34e3a1be9971 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -175,14 +175,11 @@ static void od_update(struct cpufreq_policy *policy) /* No longer fully busy, reset rate_mult */ policy_dbs->rate_mult = 1; - if (!od_tuners->powersave_bias) { - __cpufreq_driver_target(policy, freq_next, - CPUFREQ_RELATION_C); - return; - } + if (od_tuners->powersave_bias) + freq_next = od_ops.powersave_bias_target(policy, + freq_next, + CPUFREQ_RELATION_L); - freq_next = od_ops.powersave_bias_target(policy, freq_next, - CPUFREQ_RELATION_L); __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); } } From 8eb055d3f53e52805907ea54e2eec0885be91a50 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Feb 2016 21:02:32 +0100 Subject: [PATCH 67/94] cpufreq: ondemand: Drop unused callback from struct od_ops The ->freq_increase callback in struct od_ops is never invoked, so drop it. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 1 - drivers/cpufreq/cpufreq_ondemand.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 7ae0c71143fa..675e1cdbb46c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -220,7 +220,6 @@ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); - void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; extern struct mutex dbs_data_mutex; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 34e3a1be9971..375fdcfbc02e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -417,7 +417,6 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = generic_powersave_bias_target, - .freq_increase = dbs_freq_increase, }; static struct dbs_governor od_dbs_gov = { From 574ef14d5dbcd2743326cc1b28e61a1e7733162a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:19:00 +0100 Subject: [PATCH 68/94] cpufreq: governor: Close dbs_data update race condition It is possible for a dbs_data object to be updated after its usage counter has become 0. That may happen if governor_store() runs (via a govenor tunable sysfs attribute write) in parallel with cpufreq_governor_exit() called for the last cpufreq policy associated with the dbs_data in question. In that case, if governor_store() acquires dbs_data->mutex right after cpufreq_governor_exit() has released it, the ->store() callback invoked by it may operate on dbs_data with no users. Although sysfs will cause the kobject_put() in cpufreq_governor_exit() to block until governor_store() has returned, that situation may lead to some unexpected results, depending on the implementation of the ->store callback, and therefore it should be avoided. To that end, modify governor_store() to check the dbs_data's usage count before invoking the ->store() callback and return an error if it is 0 at that point. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 56dba71d1788..65ed859030ba 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -112,7 +112,7 @@ static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, mutex_lock(&dbs_data->mutex); - if (gattr->store) + if (dbs_data->usage_count && gattr->store) ret = gattr->store(dbs_data, buf, count); mutex_unlock(&dbs_data->mutex); From 8847e038c1d19c20dda0d7a590e31ffa528da8a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:20:13 +0100 Subject: [PATCH 69/94] cpufreq: governor: Move io_is_busy to struct dbs_data The io_is_busy governor tunable is only used by the ondemand governor and is located in the ondemand-specific data structure, but it is looked at by the common governor code that has to do ugly things to get to that value, so move it to struct dbs_data and modify ondemand accordingly. Since the conservative governor never touches that field, it will be always 0 for that governor and it won't have any effect on the results of computations in that case. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 27 +++++++++------------------ drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 12 +++++------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 65ed859030ba..60268160e0ad 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -137,10 +137,9 @@ unsigned int dbs_update(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int ignore_nice = dbs_data->ignore_nice_load; unsigned int max_load = 0; - unsigned int sampling_rate, j; + unsigned int sampling_rate, io_busy, j; /* * Sometimes governors may use an additional multiplier to increase @@ -149,6 +148,12 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * conservative. */ sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; + /* + * For the purpose of ondemand, waiting for disk IO is an indication + * that you're performance critical, and not that the system is actually + * idle, so do not add the iowait time to the CPU idle time then. + */ + io_busy = dbs_data->io_is_busy; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { @@ -156,18 +161,9 @@ unsigned int dbs_update(struct cpufreq_policy *policy) u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; - int io_busy = 0; j_cdbs = gov->get_cpu_cdbs(j); - /* - * For the purpose of ondemand, waiting for disk IO is - * an indication that you're performance critical, and - * not that the system is actually idle. So do not add - * the iowait time to the cpu idle time. - */ - if (gov->governor == GOV_ONDEMAND) - io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; @@ -522,7 +518,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - int io_busy = 0; + unsigned int io_busy; if (!policy->cur) return -EINVAL; @@ -532,12 +528,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) sampling_rate = dbs_data->sampling_rate; ignore_nice = dbs_data->ignore_nice_load; - - if (gov->governor == GOV_ONDEMAND) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - io_busy = od_tuners->io_is_busy; - } + io_busy = dbs_data->io_is_busy; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 675e1cdbb46c..7b3639328066 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -71,6 +71,7 @@ struct dbs_data { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; + unsigned int io_is_busy; struct kobject kobj; struct list_head policy_dbs_list; @@ -177,7 +178,6 @@ struct cs_cpu_dbs_info_s { /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { unsigned int powersave_bias; - unsigned int io_is_busy; }; struct cs_dbs_tuners { diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 375fdcfbc02e..330b5884b99b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -220,7 +220,6 @@ static struct dbs_governor od_dbs_gov; static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; unsigned int j; @@ -228,14 +227,14 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - od_tuners->io_is_busy = !!input; + dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); } return count; } @@ -286,7 +285,6 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; @@ -309,7 +307,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, struct od_cpu_dbs_info_s *dbs_info; dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); if (dbs_data->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -342,7 +340,7 @@ gov_show_one_common(up_threshold); gov_show_one_common(sampling_down_factor); gov_show_one_common(ignore_nice_load); gov_show_one_common(min_sampling_rate); -gov_show_one(od, io_is_busy); +gov_show_one_common(io_is_busy); gov_show_one(od, powersave_bias); gov_attr_rw(sampling_rate); @@ -401,7 +399,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; - tuners->io_is_busy = should_io_be_busy(); + dbs_data->io_is_busy = should_io_be_busy(); dbs_data->tuners = tuners; return 0; From 702c9e542a25cf95683c08c56e711eddb80020ac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:21:21 +0100 Subject: [PATCH 70/94] cpufreq: governor: Add a ->start callback for governors To avoid having to check the governor type explicitly in the common code in order to initialize data structures specific to the governor type properly, add a ->start callback to struct dbs_governor and use it to initialize those data structures for the ondemand and conservative governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 9 +++++++++ drivers/cpufreq/cpufreq_governor.c | 16 ++-------------- drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 10 ++++++++++ 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 4a6f8e1ed72e..c11fe95152de 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -279,6 +279,14 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } +static void cs_start(struct cpufreq_policy *policy) +{ + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); + + dbs_info->down_skip = 0; + dbs_info->requested_freq = policy->cur; +} + define_get_cpu_dbs_routines(cs_cpu_dbs_info); static struct dbs_governor cs_dbs_gov = { @@ -295,6 +303,7 @@ static struct dbs_governor cs_dbs_gov = { .gov_dbs_timer = cs_dbs_timer, .init = cs_init, .exit = cs_exit, + .start = cs_start, }; #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 60268160e0ad..badbd467e5e2 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -517,7 +517,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; + unsigned int sampling_rate, ignore_nice, j; unsigned int io_busy; if (!policy->cur) @@ -543,19 +543,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } - if (gov->governor == GOV_CONSERVATIVE) { - struct cs_cpu_dbs_info_s *cs_dbs_info = - gov->get_cpu_dbs_info_s(cpu); - - cs_dbs_info->down_skip = 0; - cs_dbs_info->requested_freq = policy->cur; - } else { - struct od_ops *od_ops = gov->gov_ops; - struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu); - - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops->powersave_bias_init_cpu(cpu); - } + gov->start(policy); gov_set_update_util(policy_dbs, sampling_rate); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 7b3639328066..2ae0ad50ca3d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -205,6 +205,7 @@ struct dbs_governor { unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); + void (*start)(struct cpufreq_policy *policy); /* Governor specific ops, see below */ void *gov_ops; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 330b5884b99b..de069f80b619 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -410,6 +410,15 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } +static void od_start(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->sample_type = OD_NORMAL_SAMPLE; + od_ops.powersave_bias_init_cpu(cpu); +} + define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { @@ -432,6 +441,7 @@ static struct dbs_governor od_dbs_gov = { .gov_ops = &od_ops, .init = od_init, .exit = od_exit, + .start = od_start, }; #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) From 8434dadbb457813a127f56d9f0fb7d22035027b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:22:42 +0100 Subject: [PATCH 71/94] cpufreq: governor: Drop unused governor callback and data fields After some previous changes, the ->get_cpu_dbs_info_s governor callback and the "governor" field in struct dbs_governor (whose value represents the governor type) are not used any more, so drop them. Also drop the unused gov_ops field from struct dbs_governor. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 2 -- drivers/cpufreq/cpufreq_governor.h | 15 +-------------- drivers/cpufreq/cpufreq_ondemand.c | 3 --- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c11fe95152de..cdc753139861 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -296,10 +296,8 @@ static struct dbs_governor cs_dbs_gov = { .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, - .governor = GOV_CONSERVATIVE, .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = cs_dbs_timer, .init = cs_init, .exit = cs_exit, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 2ae0ad50ca3d..ee46f34f04d7 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -46,11 +46,6 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ { \ return &per_cpu(_dbs_info, cpu).cdbs; \ -} \ - \ -static void *get_cpu_dbs_info_s(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu); \ } /* @@ -188,10 +183,6 @@ struct cs_dbs_tuners { /* Common Governor data across policies */ struct dbs_governor { struct cpufreq_governor gov; - - #define GOV_ONDEMAND 0 - #define GOV_CONSERVATIVE 1 - int governor; struct kobj_type kobj_type; /* @@ -201,14 +192,10 @@ struct dbs_governor { struct dbs_data *gdbs_data; struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); - void *(*get_cpu_dbs_info_s)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); void (*start)(struct cpufreq_policy *policy); - - /* Governor specific ops, see below */ - void *gov_ops; }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) @@ -216,7 +203,7 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } -/* Governor specific ops, will be passed to dbs_data->gov_ops */ +/* Governor specific operations */ struct od_ops { void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index de069f80b619..41d239c8dbf6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -433,12 +433,9 @@ static struct dbs_governor od_dbs_gov = { .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }, - .governor = GOV_ONDEMAND, .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, .gov_dbs_timer = od_dbs_timer, - .gov_ops = &od_ops, .init = od_init, .exit = od_exit, .start = od_start, From 76c5f66aa10720a377dfe8beebd39a0b2a938965 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:24:32 +0100 Subject: [PATCH 72/94] cpufreq: ondemand: Drop one more callback from struct od_ops The ->powersave_bias_init_cpu callback in struct od_ops is only used in one place and that invocation may be replaced with a direct call to the function pointed to by that callback, so change the code accordingly and drop the callback. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.h | 1 - drivers/cpufreq/cpufreq_ondemand.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ee46f34f04d7..ec98065dc30d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -205,7 +205,6 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy /* Governor specific operations */ struct od_ops { - void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); }; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 41d239c8dbf6..393fcf13a2b6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -416,13 +416,12 @@ static void od_start(struct cpufreq_policy *policy) struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops.powersave_bias_init_cpu(cpu); + ondemand_powersave_bias_init_cpu(cpu); } define_get_cpu_dbs_routines(od_cpu_dbs_info); static struct od_ops od_ops = { - .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = generic_powersave_bias_target, }; From a33cce1c6cc3268d8b4843bf1e4ac1e70b27d107 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:26:55 +0100 Subject: [PATCH 73/94] cpufreq: governor: Fix CPU load information updates via ->store The ->store() callbacks of some tunable sysfs attributes of the ondemand and conservative governors trigger immediate updates of the CPU load information for all CPUs "governed" by the given dbs_data by walking the cpu_dbs_info structures for all online CPUs in the system and updating them. This is questionable for two reasons. First, it may lead to a lot of extra overhead on a system with many CPUs if the given dbs_data is only associated with a few of them. Second, if governor tunables are per-policy, the CPUs associated with the other sets of governor tunables should not be updated. To address this issue, use the observation that in all of the places in question the update operation may be carried out in the same way (because all of the tunables involved are now located in struct dbs_data and readily available to the common code) and make the code in those places invoke the same (new) helper function that will carry out the update correctly. That new function always checks the ignore_nice_load tunable value and updates the CPUs' prev_cpu_nice data fields if that's set, which wasn't done by the original code in store_io_is_busy(), but it should have been done in there too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 15 +++++-------- drivers/cpufreq/cpufreq_governor.c | 30 ++++++++++++++++++++++++++ drivers/cpufreq/cpufreq_governor.h | 1 + drivers/cpufreq/cpufreq_ondemand.c | 22 ++++--------------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index cdc753139861..876984c842b1 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,6 +23,8 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); +static struct dbs_governor cs_dbs_gov; + static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -164,7 +166,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - unsigned int input, j; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -180,15 +182,8 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct cs_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(cs_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, 0); - if (dbs_data->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - } + gov_update_cpu_data(&cs_dbs_gov, dbs_data); + return count; } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index badbd467e5e2..4b14f04daa41 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -80,6 +80,36 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, } EXPORT_SYMBOL_GPL(store_sampling_rate); +/** + * gov_update_cpu_data - Update CPU load data. + * @gov: Governor whose data is to be updated. + * @dbs_data: Top-level governor data pointer. + * + * Update CPU load data for all CPUs in the domain governed by @dbs_data + * (that may be a single policy or a bunch of them if governor tunables are + * system-wide). + * + * Call under the @dbs_data mutex. + */ +void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) +{ + struct policy_dbs_info *policy_dbs; + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + unsigned int j; + + for_each_cpu(j, policy_dbs->policy->cpus) { + struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, + dbs_data->io_is_busy); + if (dbs_data->ignore_nice_load) + j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + } +} +EXPORT_SYMBOL_GPL(gov_update_cpu_data); + static inline struct dbs_data *to_dbs_data(struct kobject *kobj) { return container_of(kobj, struct dbs_data, kobj); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ec98065dc30d..5c7d1ea96fff 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -218,4 +218,5 @@ void od_register_powersave_bias_handler(unsigned int (*f) void od_unregister_powersave_bias_handler(void); ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count); +void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 393fcf13a2b6..216ea442b835 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -29,6 +29,7 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); +static struct dbs_governor od_dbs_gov; static struct od_ops od_ops; static unsigned int default_powersave_bias; @@ -222,7 +223,6 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, { unsigned int input; int ret; - unsigned int j; ret = sscanf(buf, "%u", &input); if (ret != 1) @@ -230,12 +230,8 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); - } + gov_update_cpu_data(&od_dbs_gov, dbs_data); + return count; } @@ -288,8 +284,6 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, unsigned int input; int ret; - unsigned int j; - ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -303,16 +297,8 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, dbs_data->io_is_busy); - if (dbs_data->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + gov_update_cpu_data(&od_dbs_gov, dbs_data); - } return count; } From d1db75fffc22504c586c3fae8d602384ea899340 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:28:24 +0100 Subject: [PATCH 74/94] cpufreq: ondemand: Rework the handling of powersave bias updates The ondemand_powersave_bias_init() function used for resetting data fields related to the powersave bias tunable of the ondemand governor works by walking all of the online CPUs in the system and updating the od_cpu_dbs_info_s structures for all of them. However, if governor tunables are per policy, the update should not touch the CPUs that are not associated with the given dbs_data. Moreover, since the data fields in question are only ever used for policy->cpu in each policy governed by ondemand, the update can be limited to those specific CPUs. Rework the code to take the above observations into account. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_ondemand.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 216ea442b835..43d89f6af206 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -34,14 +34,6 @@ static struct od_ops od_ops; static unsigned int default_powersave_bias; -static void ondemand_powersave_bias_init_cpu(int cpu) -{ - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); - dbs_info->freq_lo = 0; -} - /* * Not all CPUs want IO time to be accounted as busy; this depends on how * efficient idling at a higher frequency/voltage is. @@ -120,12 +112,13 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_hi; } -static void ondemand_powersave_bias_init(void) +static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { - int i; - for_each_online_cpu(i) { - ondemand_powersave_bias_init_cpu(i); - } + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; } static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) @@ -306,6 +299,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, size_t count) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct policy_dbs_info *policy_dbs; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -317,7 +311,10 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, input = 1000; od_tuners->powersave_bias = input; - ondemand_powersave_bias_init(); + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) + ondemand_powersave_bias_init(policy_dbs->policy); + return count; } @@ -398,11 +395,10 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) static void od_start(struct cpufreq_policy *policy) { - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); dbs_info->sample_type = OD_NORMAL_SAMPLE; - ondemand_powersave_bias_init_cpu(cpu); + ondemand_powersave_bias_init(policy); } define_get_cpu_dbs_routines(od_cpu_dbs_info); From 7d5a9956af4ccf7d5cc0cd1f8d27d1691321bfc6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 18:40:14 +0100 Subject: [PATCH 75/94] cpufreq: governor: Make governor private data per-policy Some fields in struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s are only used for a limited set of CPUs. Namely, if a policy is shared between multiple CPUs, those fields will only be used for one of them (policy->cpu). This means that they really are per-policy rather than per-CPU and holding room for them in per-CPU data structures is generally wasteful. Also moving those fields into per-policy data structures will allow some significant simplifications to be made going forward. For this reason, introduce struct cs_policy_dbs_info and struct od_policy_dbs_info to hold those fields. Define each of the new structures as an extension of struct policy_dbs_info (such that struct policy_dbs_info is embedded in each of them) and introduce new ->alloc and ->free governor callbacks to allocate and free those structures, respectively, such that ->alloc() will return a pointer to the struct policy_dbs_info embedded in the allocated data structure and ->free() will take that pointer as its argument. With that, modify the code accessing the data fields in question in per-CPU data objects to look for them in the new structures via the struct policy_dbs_info pointer available to it and drop them from struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/amd_freq_sensitivity.c | 5 ++-- drivers/cpufreq/cpufreq_conservative.c | 34 +++++++++++++++++++++++--- drivers/cpufreq/cpufreq_governor.c | 7 +++--- drivers/cpufreq/cpufreq_governor.h | 9 ++----- drivers/cpufreq/cpufreq_ondemand.c | 34 ++++++++++++++++++-------- drivers/cpufreq/cpufreq_ondemand.h | 26 ++++++++++++++++++++ 6 files changed, 87 insertions(+), 28 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_ondemand.h diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 82ae1002def1..404360cad25c 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -21,7 +21,7 @@ #include #include -#include "cpufreq_governor.h" +#include "cpufreq_ondemand.h" #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 @@ -48,8 +48,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; - struct od_cpu_dbs_info_s *od_info = - dbs_governor_of(policy)->get_cpu_dbs_info_s(policy->cpu); + struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 876984c842b1..ffffda2dcbfc 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -14,6 +14,17 @@ #include #include "cpufreq_governor.h" +struct cs_policy_dbs_info { + struct policy_dbs_info policy_dbs; + unsigned int down_skip; + unsigned int requested_freq; +}; + +static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); +} + /* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) @@ -48,8 +59,8 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, */ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int load = dbs_update(policy); @@ -238,6 +249,19 @@ static struct attribute *cs_attributes[] = { /************************** sysfs end ************************/ +static struct policy_dbs_info *cs_alloc(void) +{ + struct cs_policy_dbs_info *dbs_info; + + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} + +static void cs_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} + static int cs_init(struct dbs_data *dbs_data, bool notify) { struct cs_dbs_tuners *tuners; @@ -276,7 +300,7 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) static void cs_start(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, policy->cpu); + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->down_skip = 0; dbs_info->requested_freq = policy->cur; @@ -294,6 +318,8 @@ static struct dbs_governor cs_dbs_gov = { .kobj_type = { .default_attrs = cs_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = cs_dbs_timer, + .alloc = cs_alloc, + .free = cs_free, .init = cs_init, .exit = cs_exit, .start = cs_start, @@ -305,9 +331,8 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - struct cs_cpu_dbs_info_s *dbs_info = - &per_cpu(cs_cpu_dbs_info, freq->cpu); struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + struct cs_policy_dbs_info *dbs_info; if (!policy) return 0; @@ -316,6 +341,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) return 0; + dbs_info = to_dbs_info(policy->governor_data); /* * we only care if our internally tracked freq moves outside the 'valid' * ranges of frequency available to us otherwise we do not change it diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4b14f04daa41..6cbc846e3981 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -385,8 +385,8 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli struct policy_dbs_info *policy_dbs; int j; - /* Allocate memory for the common information for policy->cpus */ - policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL); + /* Allocate memory for per-policy governor data. */ + policy_dbs = gov->alloc(); if (!policy_dbs) return NULL; @@ -421,7 +421,7 @@ static void free_policy_dbs_info(struct cpufreq_policy *policy, j_cdbs->policy_dbs = NULL; j_cdbs->update_util.func = NULL; } - kfree(policy_dbs); + gov->free(policy_dbs); } static int cpufreq_governor_init(struct cpufreq_policy *policy) @@ -582,7 +582,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) static int cpufreq_governor_stop(struct cpufreq_policy *policy) { gov_cancel_work(policy); - return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 5c7d1ea96fff..354e0d306ff5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -157,17 +157,10 @@ struct cpu_dbs_info { struct od_cpu_dbs_info_s { struct cpu_dbs_info cdbs; - struct cpufreq_frequency_table *freq_table; - unsigned int freq_lo; - unsigned int freq_lo_delay_us; - unsigned int freq_hi_delay_us; - unsigned int sample_type:1; }; struct cs_cpu_dbs_info_s { struct cpu_dbs_info cdbs; - unsigned int down_skip; - unsigned int requested_freq; }; /* Per policy Governors sysfs tunables */ @@ -193,6 +186,8 @@ struct dbs_governor { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); + struct policy_dbs_info *(*alloc)(void); + void (*free)(struct policy_dbs_info *policy_dbs); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); void (*start)(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 43d89f6af206..cdf431696c40 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -16,7 +16,8 @@ #include #include #include -#include "cpufreq_governor.h" + +#include "cpufreq_ondemand.h" /* On-demand governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) @@ -69,9 +70,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_hi, freq_lo; unsigned int index = 0; unsigned int delay_hi_us; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - policy->cpu); struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -114,10 +114,9 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); dbs_info->freq_lo = 0; } @@ -144,8 +143,8 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) */ static void od_update(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); - struct policy_dbs_info *policy_dbs = dbs_info->cdbs.policy_dbs; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int load = dbs_update(policy); @@ -182,7 +181,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); int sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ @@ -347,6 +346,19 @@ static struct attribute *od_attributes[] = { /************************** sysfs end ************************/ +static struct policy_dbs_info *od_alloc(void) +{ + struct od_policy_dbs_info *dbs_info; + + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} + +static void od_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} + static int od_init(struct dbs_data *dbs_data, bool notify) { struct od_dbs_tuners *tuners; @@ -395,7 +407,7 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) static void od_start(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->sample_type = OD_NORMAL_SAMPLE; ondemand_powersave_bias_init(policy); @@ -417,6 +429,8 @@ static struct dbs_governor od_dbs_gov = { .kobj_type = { .default_attrs = od_attributes }, .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = od_dbs_timer, + .alloc = od_alloc, + .free = od_free, .init = od_init, .exit = od_exit, .start = od_start, diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h new file mode 100644 index 000000000000..22403e4e0cb0 --- /dev/null +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -0,0 +1,26 @@ +/* + * Header file for CPUFreq ondemand governor and related code. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +struct od_policy_dbs_info { + struct policy_dbs_info policy_dbs; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_delay_us; + unsigned int freq_hi_delay_us; + unsigned int sample_type:1; +}; + +static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); +} From 8c8f77fd0719a079450f59debed4f69ede825adb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 00:51:27 +0100 Subject: [PATCH 76/94] cpufreq: governor: Move per-CPU data to the common code After previous changes there is only one piece of code in the ondemand governor making references to per-CPU data structures, but it can be easily modified to avoid doing that, so modify it accordingly and move the definition of per-CPU data used by the ondemand and conservative governors to the common code. Next, change that code to access the per-CPU data structures directly rather than via a governor callback. This causes the ->get_cpu_cdbs governor callback to become unnecessary, so drop it along with the macro and function definitions related to it. Finally, drop the definitions of struct od_cpu_dbs_info_s and struct cs_cpu_dbs_info_s that aren't necessary any more. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 9 +------- drivers/cpufreq/cpufreq_governor.c | 31 +++++++++++--------------- drivers/cpufreq/cpufreq_governor.h | 18 +-------------- drivers/cpufreq/cpufreq_ondemand.c | 26 +++++++++------------ 4 files changed, 25 insertions(+), 59 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ffffda2dcbfc..5d1edc55aa63 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -32,10 +32,6 @@ static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) -static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); - -static struct dbs_governor cs_dbs_gov; - static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -193,7 +189,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&cs_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -306,8 +302,6 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } -define_get_cpu_dbs_routines(cs_cpu_dbs_info); - static struct dbs_governor cs_dbs_gov = { .gov = { .name = "conservative", @@ -316,7 +310,6 @@ static struct dbs_governor cs_dbs_gov = { .owner = THIS_MODULE, }, .kobj_type = { .default_attrs = cs_attributes }, - .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = cs_dbs_timer, .alloc = cs_alloc, .free = cs_free, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6cbc846e3981..75217b850d7b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,8 @@ #include "cpufreq_governor.h" +static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); + DEFINE_MUTEX(dbs_data_mutex); EXPORT_SYMBOL_GPL(dbs_data_mutex); @@ -82,7 +84,6 @@ EXPORT_SYMBOL_GPL(store_sampling_rate); /** * gov_update_cpu_data - Update CPU load data. - * @gov: Governor whose data is to be updated. * @dbs_data: Top-level governor data pointer. * * Update CPU load data for all CPUs in the domain governed by @dbs_data @@ -91,7 +92,7 @@ EXPORT_SYMBOL_GPL(store_sampling_rate); * * Call under the @dbs_data mutex. */ -void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) +void gov_update_cpu_data(struct dbs_data *dbs_data) { struct policy_dbs_info *policy_dbs; @@ -99,7 +100,7 @@ void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data) unsigned int j; for_each_cpu(j, policy_dbs->policy->cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, dbs_data->io_is_busy); @@ -164,7 +165,6 @@ static const struct sysfs_ops governor_sysfs_ops = { unsigned int dbs_update(struct cpufreq_policy *policy) { - struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; unsigned int ignore_nice = dbs_data->ignore_nice_load; @@ -187,13 +187,11 @@ unsigned int dbs_update(struct cpufreq_policy *policy) /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs; + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; - j_cdbs = gov->get_cpu_cdbs(j); - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; @@ -268,14 +266,13 @@ void gov_set_update_util(struct policy_dbs_info *policy_dbs, unsigned int delay_us) { struct cpufreq_policy *policy = policy_dbs->policy; - struct dbs_governor *gov = dbs_governor_of(policy); int cpu; gov_update_sample_delay(policy_dbs, delay_us); policy_dbs->last_sample_time = 0; for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); cpufreq_set_update_util_data(cpu, &cdbs->update_util); } @@ -398,7 +395,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli /* Set policy_dbs for all CPUs, online+offline */ for_each_cpu(j, policy->related_cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = policy_dbs; j_cdbs->update_util.func = dbs_update_util_handler; @@ -406,17 +403,15 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli return policy_dbs; } -static void free_policy_dbs_info(struct cpufreq_policy *policy, +static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, struct dbs_governor *gov) { - struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu); - struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; int j; mutex_destroy(&policy_dbs->timer_mutex); - for_each_cpu(j, policy->related_cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + for_each_cpu(j, policy_dbs->policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = NULL; j_cdbs->update_util.func = NULL; @@ -507,7 +502,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) kfree(dbs_data); free_policy_dbs_info: - free_policy_dbs_info(policy, gov); + free_policy_dbs_info(policy_dbs, gov); return ret; } @@ -538,7 +533,7 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; } - free_policy_dbs_info(policy, gov); + free_policy_dbs_info(policy_dbs, gov); return 0; } @@ -561,7 +556,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) io_busy = dbs_data->io_is_busy; for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); unsigned int prev_load; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 354e0d306ff5..58749da97099 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -41,13 +41,6 @@ /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; -/* create helper routines */ -#define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu).cdbs; \ -} - /* * Abbreviations: * dbs: used as a shortform for demand based switching It helps to keep variable @@ -155,14 +148,6 @@ struct cpu_dbs_info { struct policy_dbs_info *policy_dbs; }; -struct od_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; -}; - -struct cs_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; -}; - /* Per policy Governors sysfs tunables */ struct od_dbs_tuners { unsigned int powersave_bias; @@ -184,7 +169,6 @@ struct dbs_governor { */ struct dbs_data *gdbs_data; - struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); struct policy_dbs_info *(*alloc)(void); void (*free)(struct policy_dbs_info *policy_dbs); @@ -213,5 +197,5 @@ void od_register_powersave_bias_handler(unsigned int (*f) void od_unregister_powersave_bias_handler(void); ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, size_t count); -void gov_update_cpu_data(struct dbs_governor *gov, struct dbs_data *dbs_data); +void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index cdf431696c40..acd80272ded6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,9 +28,6 @@ #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); - -static struct dbs_governor od_dbs_gov; static struct od_ops od_ops; static unsigned int default_powersave_bias; @@ -222,7 +219,7 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&od_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -289,7 +286,7 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - gov_update_cpu_data(&od_dbs_gov, dbs_data); + gov_update_cpu_data(dbs_data); return count; } @@ -413,8 +410,6 @@ static void od_start(struct cpufreq_policy *policy) ondemand_powersave_bias_init(policy); } -define_get_cpu_dbs_routines(od_cpu_dbs_info); - static struct od_ops od_ops = { .powersave_bias_target = generic_powersave_bias_target, }; @@ -427,7 +422,6 @@ static struct dbs_governor od_dbs_gov = { .owner = THIS_MODULE, }, .kobj_type = { .default_attrs = od_attributes }, - .get_cpu_cdbs = get_cpu_cdbs, .gov_dbs_timer = od_dbs_timer, .alloc = od_alloc, .free = od_free, @@ -440,9 +434,6 @@ static struct dbs_governor od_dbs_gov = { static void od_set_powersave_bias(unsigned int powersave_bias) { - struct cpufreq_policy *policy; - struct dbs_data *dbs_data; - struct od_dbs_tuners *od_tuners; unsigned int cpu; cpumask_t done; @@ -451,21 +442,24 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { + struct cpufreq_policy *policy; struct policy_dbs_info *policy_dbs; + struct dbs_data *dbs_data; + struct od_dbs_tuners *od_tuners; if (cpumask_test_cpu(cpu, &done)) continue; - policy_dbs = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy_dbs; + policy = cpufreq_cpu_get_raw(cpu); + if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + continue; + + policy_dbs = policy->governor_data; if (!policy_dbs) continue; - policy = policy_dbs->policy; cpumask_or(&done, &done, policy->cpus); - if (policy->governor != CPU_FREQ_GOV_ONDEMAND) - continue; - dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; From 47ebaac1f32dc606262be48a72f9cea6af376414 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 18:41:36 +0100 Subject: [PATCH 77/94] cpufreq: governor: Relocate definitions of tuners structures Move the definitions of struct od_dbs_tuners and struct cs_dbs_tuners from the common governor header to the ondemand and conservative governor code, respectively, as they don't need to be in the common header any more. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_conservative.c | 5 +++++ drivers/cpufreq/cpufreq_governor.h | 10 ---------- drivers/cpufreq/cpufreq_ondemand.h | 4 ++++ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 5d1edc55aa63..bf4913f6453b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -25,6 +25,11 @@ static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); } +struct cs_dbs_tuners { + unsigned int down_threshold; + unsigned int freq_step; +}; + /* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 58749da97099..ece70ab6bbfc 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -148,16 +148,6 @@ struct cpu_dbs_info { struct policy_dbs_info *policy_dbs; }; -/* Per policy Governors sysfs tunables */ -struct od_dbs_tuners { - unsigned int powersave_bias; -}; - -struct cs_dbs_tuners { - unsigned int down_threshold; - unsigned int freq_step; -}; - /* Common Governor data across policies */ struct dbs_governor { struct cpufreq_governor gov; diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h index 22403e4e0cb0..f0121db3cd9e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.h +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -24,3 +24,7 @@ static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *pol { return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); } + +struct od_dbs_tuners { + unsigned int powersave_bias; +}; From e3f5ed9393042188a1716d3873415ef44161addf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2016 02:33:43 +0100 Subject: [PATCH 78/94] cpufreq: governor: Make dbs_data_mutex static That mutex is only used by cpufreq_governor_dbs() and it doesn't need to be exported to modules, so make it static and drop the export incantation. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 3 +-- drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 75217b850d7b..4f0bd482b59e 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -24,8 +24,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); -DEFINE_MUTEX(dbs_data_mutex); -EXPORT_SYMBOL_GPL(dbs_data_mutex); +static DEFINE_MUTEX(dbs_data_mutex); /* Common sysfs tunables */ /** diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ece70ab6bbfc..61ff82fe0613 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -178,7 +178,6 @@ struct od_ops { unsigned int freq_next, unsigned int relation); }; -extern struct mutex dbs_data_mutex; unsigned int dbs_update(struct cpufreq_policy *policy); int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) From 1112e9d83e5cd153b35dfbb52721f8b3d3163016 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 00:53:06 +0100 Subject: [PATCH 79/94] cpufreq: governor: Narrow down the dbs_data_mutex coverage Since cpufreq_governor_dbs() is now always called with policy->rwsem held, it cannot be executed twice in parallel for the same policy. Thus it is not necessary to hold dbs_data_mutex around the invocations of cpufreq_governor_start/stop/limits() from it as those functions never modify any data that can be shared between different policies. However, cpufreq_governor_dbs() may be executed twice in parallal for different policies using the same gov->gdbs_data object and dbs_data_mutex is still necessary to protect that object against concurrent updates. For this reason, narrow down the dbs_data_mutex locking to cpufreq_governor_init/exit() where it is needed and rename the mutex to gov_dbs_data_mutex to reflect its purpose. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4f0bd482b59e..542c9caf8815 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -24,7 +24,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); -static DEFINE_MUTEX(dbs_data_mutex); +static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ /** @@ -421,10 +421,10 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, static int cpufreq_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); - struct dbs_data *dbs_data = gov->gdbs_data; + struct dbs_data *dbs_data; struct policy_dbs_info *policy_dbs; unsigned int latency; - int ret; + int ret = 0; /* State should be equivalent to EXIT */ if (policy->governor_data) @@ -434,6 +434,10 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!policy_dbs) return -ENOMEM; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + + dbs_data = gov->gdbs_data; if (dbs_data) { if (WARN_ON(have_governor_per_policy())) { ret = -EINVAL; @@ -446,8 +450,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) dbs_data->usage_count++; list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); mutex_unlock(&dbs_data->mutex); - - return 0; + goto out; } dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); @@ -488,7 +491,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) get_governor_parent_kobj(policy), "%s", gov->gov.name); if (!ret) - return 0; + goto out; /* Failure, so roll back. */ pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); @@ -502,6 +505,9 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) free_policy_dbs_info: free_policy_dbs_info(policy_dbs, gov); + +out: + mutex_unlock(&gov_dbs_data_mutex); return ret; } @@ -512,6 +518,9 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_data *dbs_data = policy_dbs->dbs_data; int count; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + mutex_lock(&dbs_data->mutex); list_del(&policy_dbs->list); count = --dbs_data->usage_count; @@ -533,6 +542,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) } free_policy_dbs_info(policy_dbs, gov); + + mutex_unlock(&gov_dbs_data_mutex); return 0; } @@ -599,31 +610,20 @@ static int cpufreq_governor_limits(struct cpufreq_policy *policy) int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - int ret = -EINVAL; - - /* Lock governor to block concurrent initialization of governor */ - mutex_lock(&dbs_data_mutex); - if (event == CPUFREQ_GOV_POLICY_INIT) { - ret = cpufreq_governor_init(policy); + return cpufreq_governor_init(policy); } else if (policy->governor_data) { switch (event) { case CPUFREQ_GOV_POLICY_EXIT: - ret = cpufreq_governor_exit(policy); - break; + return cpufreq_governor_exit(policy); case CPUFREQ_GOV_START: - ret = cpufreq_governor_start(policy); - break; + return cpufreq_governor_start(policy); case CPUFREQ_GOV_STOP: - ret = cpufreq_governor_stop(policy); - break; + return cpufreq_governor_stop(policy); case CPUFREQ_GOV_LIMITS: - ret = cpufreq_governor_limits(policy); - break; + return cpufreq_governor_limits(policy); } } - - mutex_unlock(&dbs_data_mutex); - return ret; + return -EINVAL; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); From 94ab5e030fe10cfcc700050cc21535b824943077 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 21 Feb 2016 03:15:34 +0100 Subject: [PATCH 80/94] cpufreq: governor: Make gov_set_update_util() static The gov_set_update_util() routine is only used internally by the common governor code and it doesn't need to be exported, so make it static. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 542c9caf8815..c9a571fd79ac 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -261,8 +261,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(dbs_update); -void gov_set_update_util(struct policy_dbs_info *policy_dbs, - unsigned int delay_us) +static void gov_set_update_util(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) { struct cpufreq_policy *policy = policy_dbs->policy; int cpu; @@ -276,7 +276,6 @@ void gov_set_update_util(struct policy_dbs_info *policy_dbs, cpufreq_set_update_util_data(cpu, &cdbs->update_util); } } -EXPORT_SYMBOL_GPL(gov_set_update_util); static inline void gov_clear_update_util(struct cpufreq_policy *policy) { From 27de34823984e844f5dc042d39bb43f5dc98966f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Feb 2016 14:14:34 +0100 Subject: [PATCH 81/94] cpufreq: governor: Fix race in dbs_update_util_handler() There is a scenario that may lead to undesired results in dbs_update_util_handler(). Namely, if two CPUs sharing a policy enter the funtion at the same time, pass the sample delay check and then one of them is stalled until dbs_work_handler() (queued up by the other CPU) clears the work counter, it may update the work counter and queue up another work item prematurely. To prevent that from happening, use the observation that the CPU queuing up a work item in dbs_update_util_handler() updates the last sample time. This means that if another CPU was stalling after passing the sample delay check and now successfully updated the work counter as a result of the race described above, it will see the new value of the last sample time which is different from what it used in the sample delay check before. If that happens, the sample delay check passed previously is not valid any more, so the CPU should not continue. Fixes: f17cbb53783c (cpufreq: governor: Avoid atomic operations in hot paths) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c9a571fd79ac..064582aa5a0d 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -340,7 +340,7 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; - u64 delta_ns; + u64 delta_ns, lst; /* * The work may not be allowed to be queued up right now. @@ -356,7 +356,8 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * of sample_delay_ns used in the computation may be stale. */ smp_rmb(); - delta_ns = time - policy_dbs->last_sample_time; + lst = READ_ONCE(policy_dbs->last_sample_time); + delta_ns = time - lst; if ((s64)delta_ns < policy_dbs->sample_delay_ns) return; @@ -365,9 +366,19 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, * at this point. Otherwise, we need to ensure that only one of the * CPUs sharing the policy will do that. */ - if (policy_dbs->is_shared && - !atomic_add_unless(&policy_dbs->work_count, 1, 1)) - return; + if (policy_dbs->is_shared) { + if (!atomic_add_unless(&policy_dbs->work_count, 1, 1)) + return; + + /* + * If another CPU updated last_sample_time in the meantime, we + * shouldn't be here, so clear the work counter and bail out. + */ + if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) { + atomic_set(&policy_dbs->work_count, 0); + return; + } + } policy_dbs->last_sample_time = time; policy_dbs->work_in_progress = true; From f737236b128cac7c355d0650a98c42ae4313f3f1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 14:18:20 +0530 Subject: [PATCH 82/94] cpufreq: governor: Drop unnecessary checks from show() and store() The show() and store() routines in the cpufreq-governor core don't need to check if the struct governor_attr they want to use really provides the callbacks they need as expected (if that's not the case, it means a bug in the code anyway), so change them to avoid doing that. Also change the error value to -EBUSY, if the governor is getting removed and we aren't allowed to store any more changes. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 064582aa5a0d..70079e21fa2d 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -125,12 +125,8 @@ static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, { struct dbs_data *dbs_data = to_dbs_data(kobj); struct governor_attr *gattr = to_gov_attr(attr); - int ret = -EIO; - if (gattr->show) - ret = gattr->show(dbs_data, buf); - - return ret; + return gattr->show(dbs_data, buf); } static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, @@ -138,11 +134,11 @@ static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, { struct dbs_data *dbs_data = to_dbs_data(kobj); struct governor_attr *gattr = to_gov_attr(attr); - int ret = -EIO; + int ret = -EBUSY; mutex_lock(&dbs_data->mutex); - if (dbs_data->usage_count && gattr->store) + if (dbs_data->usage_count) ret = gattr->store(dbs_data, buf, count); mutex_unlock(&dbs_data->mutex); From 11eb69b984aae216ae43c79d2d43441ee68a63ca Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:42 +0530 Subject: [PATCH 83/94] cpufreq: Relocate handle_update() to kill its declaration handle_update() is declared at the top of the file as its user appear before its definition. Relocate the routine to get rid of this. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bc93272b4a12..316beffc960a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -159,7 +159,6 @@ static inline bool has_target(void) static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); -static void handle_update(struct work_struct *work); /** * Two notifier lists: the "policy" list is involved in the @@ -1072,6 +1071,15 @@ unlock: return ret; } +static void handle_update(struct work_struct *work) +{ + struct cpufreq_policy *policy = + container_of(work, struct cpufreq_policy, update); + unsigned int cpu = policy->cpu; + pr_debug("handle_update for cpu %u called\n", cpu); + cpufreq_update_policy(cpu); +} + static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); @@ -1453,15 +1461,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) cpufreq_policy_free(policy, true); } -static void handle_update(struct work_struct *work) -{ - struct cpufreq_policy *policy = - container_of(work, struct cpufreq_policy, update); - unsigned int cpu = policy->cpu; - pr_debug("handle_update for cpu %u called\n", cpu); - cpufreq_update_policy(cpu); -} - /** * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're * in deep trouble. From a1317e091ab1386812ee8ab4e3bbd89f2811bc74 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:43 +0530 Subject: [PATCH 84/94] cpufreq: Rename __cpufreq_governor() to cpufreq_governor() The __ at the beginning of the routine aren't really necessary at all. Rename it to cpufreq_governor() instead. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 44 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 316beffc960a..b3d05a905034 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -156,8 +156,7 @@ static inline bool has_target(void) } /* internal prototypes */ -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event); +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); /** @@ -1048,7 +1047,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp down_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { pr_err("%s: Failed to stop governor\n", __func__); goto unlock; @@ -1058,9 +1057,9 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp cpumask_set_cpu(cpu, policy->cpus); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); @@ -1382,7 +1381,7 @@ static void cpufreq_offline(unsigned int cpu) down_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1403,9 +1402,9 @@ static void cpufreq_offline(unsigned int cpu) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); @@ -1419,7 +1418,7 @@ static void cpufreq_offline(unsigned int cpu) /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1635,7 +1634,7 @@ void cpufreq_suspend(void) for_each_active_policy(policy) { down_write(&policy->rwsem); - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); up_write(&policy->rwsem); if (ret) @@ -1678,9 +1677,9 @@ void cpufreq_resume(void) policy); } else { down_write(&policy->rwsem); - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); up_write(&policy->rwsem); if (ret) @@ -1977,8 +1976,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void) return NULL; } -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event) +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { int ret; @@ -2190,7 +2188,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { /* This can happen due to race with other operations */ pr_debug("%s: Failed to Stop Governor: %s (%d)\n", @@ -2198,7 +2196,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2208,30 +2206,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, /* start new governor */ policy->governor = new_policy->governor; - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); if (!ret) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) goto out; - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); } /* new governor failed, so re-start old one */ pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) policy->governor = NULL; else - __cpufreq_governor(policy, CPUFREQ_GOV_START); + cpufreq_governor(policy, CPUFREQ_GOV_START); } return ret; out: pr_debug("governor: change or update limits\n"); - return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } /** @@ -2334,7 +2332,7 @@ static int cpufreq_boost_set_sw(int state) down_write(&policy->rwsem); policy->user_policy.max = policy->max; - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); up_write(&policy->rwsem); } } From 242aa883a64d8c54cfeee47f3603b21bc705e081 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:44 +0530 Subject: [PATCH 85/94] cpufreq: Remove 'policy->governor_enabled' The entire sequence of events (like INIT/START or STOP/EXIT) for which cpufreq_governor() is called, is guaranteed to be protected by policy->rwsem now. The additional checks that were added earlier (as we were forced to drop policy->rwsem before calling cpufreq_governor() for EXIT event), aren't required anymore. Over that, they weren't sufficient really. They just take care of START/STOP events, but not INIT/EXIT and the state machine was never maintained properly by them. Kill the unnecessary checks and policy->governor_enabled field. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 17 ----------------- include/linux/cpufreq.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b3d05a905034..dd568aaf2728 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2010,17 +2010,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); - if ((policy->governor_enabled && event == CPUFREQ_GOV_START) - || (!policy->governor_enabled - && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { - return -EBUSY; - } - - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = false; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = true; - ret = policy->governor->governor(policy, event); if (!ret) { @@ -2028,12 +2017,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) policy->governor->initialized++; else if (event == CPUFREQ_GOV_POLICY_EXIT) policy->governor->initialized--; - } else { - /* Restore original values */ - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = true; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = false; } if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cac3d1ba8200..a50c5b2e3bf2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,7 +80,6 @@ struct cpufreq_policy { unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; - bool governor_enabled; /* governor start/stop flag */ char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be From e6f036571e1f65021a442ec7aad087a6a239ecfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 28 Feb 2016 02:33:29 +0100 Subject: [PATCH 86/94] cpufreq: Select IRQ_WORK if CPU_FREQ_GOV_COMMON is set Commit 0eb463be3436 (cpufreq: governor: Replace timers with utilization update callbacks) made CPU_FREQ select IRQ_WORK, but that's not necessary, as it is sufficient for IRQ_WORK to be selected by CPU_FREQ_GOV_COMMON, so modify the cpufreq Kconfig to that effect. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index dcb972a38fbc..aa403aa2b927 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,7 +3,6 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" select SRCU - select IRQ_WORK help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because @@ -20,6 +19,7 @@ config CPU_FREQ if CPU_FREQ config CPU_FREQ_GOV_COMMON + select IRQ_WORK bool config CPU_FREQ_BOOST_SW From 08f511fd41c3afe303eb9b41bff0570f7c1b6937 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Mar 2016 03:58:22 +0100 Subject: [PATCH 87/94] cpufreq: Reduce cpufreq_update_util() overhead a bit Use the observation that cpufreq_update_util() is only called by the scheduler with rq->lock held, so the callers of cpufreq_set_update_util_data() can use synchronize_sched() instead of synchronize_rcu() to wait for cpufreq_update_util() to complete. Moreover, if they are updated to do that, rcu_read_(un)lock() calls in cpufreq_update_util() might be replaced with rcu_read_(un)lock_sched(), respectively, but those aren't really necessary, because the scheduler calls that function from RCU-sched read-side critical sections already. In addition to that, if cpufreq_set_update_util_data() checks the func field in the struct update_util_data before setting the per-CPU pointer to it, the data->func check may be dropped from cpufreq_update_util() as well. Make the above changes to reduce the overhead from cpufreq_update_util() in the scheduler paths invoking it and to make the cleanup after removing its callbacks less heavy-weight somewhat. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) --- drivers/cpufreq/cpufreq.c | 25 +++++++++++++++++-------- drivers/cpufreq/cpufreq_governor.c | 2 +- drivers/cpufreq/intel_pstate.c | 4 ++-- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index dd568aaf2728..6eca12ab71d7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -115,12 +115,15 @@ static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); * to call from cpufreq_update_util(). That function will be called from an RCU * read-side critical section, so it must not sleep. * - * Callers must use RCU callbacks to free any memory that might be accessed - * via the old update_util_data pointer or invoke synchronize_rcu() right after - * this function to avoid use-after-free. + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. */ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) { + if (WARN_ON(data && !data->func)) + return; + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); } EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); @@ -133,18 +136,24 @@ EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); * * This function is called by the scheduler on every invocation of * update_load_avg() on the CPU whose utilization is being updated. + * + * It can only be called from RCU-sched read-side critical sections. */ void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) { struct update_util_data *data; - rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !rcu_read_lock_sched_held()); +#endif - data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data && data->func) + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + /* + * If this isn't inside of an RCU-sched read-side critical section, data + * may become NULL after the check below. + */ + if (data) data->func(data, time, util, max); - - rcu_read_unlock(); } /* Flag to suspend/resume CPUFreq governors */ diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 70079e21fa2d..db46190bb246 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -280,7 +280,7 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) for_each_cpu(i, policy->cpus) cpufreq_set_update_util_data(i, NULL); - synchronize_rcu(); + synchronize_sched(); } static void gov_cancel_work(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f4d85c2ae7b1..2165d2b2fc35 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1168,7 +1168,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); cpufreq_set_update_util_data(cpu_num, NULL); - synchronize_rcu(); + synchronize_sched(); if (hwp_active) return; @@ -1426,7 +1426,7 @@ out: for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { cpufreq_set_update_util_data(cpu, NULL); - synchronize_rcu(); + synchronize_sched(); kfree(all_cpu_data[cpu]); } } From edd4a893e097d744e8069acf585f8b02dbbc9134 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 3 Mar 2016 14:51:33 +0530 Subject: [PATCH 88/94] Revert "cpufreq: postfix policy directory with the first CPU in related_cpus" Revert commit 3510fac45492 (cpufreq: postfix policy directory with the first CPU in related_cpus). Earlier, the policy->kobj was added to the kobject core, before ->init() callback was called for the cpufreq drivers. Which allowed those drivers to add or remove, driver dependent, sysfs files/directories to the same kobj from their ->init() and ->exit() callbacks. That isn't possible anymore after commit 3510fac45492. Now, there is no other clean alternative that people can adopt. Its better to revert the earlier commit to allow cpufreq drivers to create/remove sysfs files from ->init() and ->exit() callbacks. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bdf258ea0977..abca44c2e4e3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -985,6 +985,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; + int ret; if (WARN_ON(!dev)) return NULL; @@ -1002,7 +1003,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; - kobject_init(&policy->kobj, &ktype_cpufreq); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + cpufreq_global_kobject, "policy%u", cpu); + if (ret) { + pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + goto err_free_real_cpus; + } + INIT_LIST_HEAD(&policy->policy_list); init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); @@ -1013,6 +1020,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) policy->cpu = cpu; return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1117,16 +1126,6 @@ static int cpufreq_online(unsigned int cpu) cpumask_copy(policy->related_cpus, policy->cpus); /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); - - /* Name and add the kobject */ - ret = kobject_add(&policy->kobj, cpufreq_global_kobject, - "policy%u", - cpumask_first(policy->related_cpus)); - if (ret) { - pr_err("%s: failed to add policy->kobj: %d\n", __func__, - ret); - goto out_exit_policy; - } } /* From adaf9fcd136970e480d7ca834c0cf25ce922ea74 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Mar 2016 20:44:47 +0100 Subject: [PATCH 89/94] cpufreq: Move scheduler-related code to the sched directory Create cpufreq.c under kernel/sched/ and move the cpufreq code related to the scheduler to that file and to sched.h. Redefine cpufreq_update_util() as a static inline function to avoid function calls at its call sites in the scheduler code (as suggested by Peter Zijlstra). Also move the definition of struct update_util_data and declaration of cpufreq_set_update_util_data() from include/linux/cpufreq.h to include/linux/sched.h. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- drivers/cpufreq/cpufreq.c | 53 ------------------------------ drivers/cpufreq/cpufreq_governor.c | 1 + include/linux/cpufreq.h | 34 ------------------- include/linux/sched.h | 9 +++++ kernel/sched/Makefile | 1 + kernel/sched/cpufreq.c | 37 +++++++++++++++++++++ kernel/sched/sched.h | 49 ++++++++++++++++++++++++++- 7 files changed, 96 insertions(+), 88 deletions(-) create mode 100644 kernel/sched/cpufreq.c diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6eca12ab71d7..58e1a39b4d22 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -103,59 +103,6 @@ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); -static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); - -/** - * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. - * @cpu: The CPU to set the pointer for. - * @data: New pointer value. - * - * Set and publish the update_util_data pointer for the given CPU. That pointer - * points to a struct update_util_data object containing a callback function - * to call from cpufreq_update_util(). That function will be called from an RCU - * read-side critical section, so it must not sleep. - * - * Callers must use RCU-sched callbacks to free any memory that might be - * accessed via the old update_util_data pointer or invoke synchronize_sched() - * right after this function to avoid use-after-free. - */ -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) -{ - if (WARN_ON(data && !data->func)) - return; - - rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); -} -EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); - -/** - * cpufreq_update_util - Take a note about CPU utilization changes. - * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. - * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. - * - * It can only be called from RCU-sched read-side critical sections. - */ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - -#ifdef CONFIG_LOCKDEP - WARN_ON(debug_locks && !rcu_read_lock_sched_held()); -#endif - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - /* - * If this isn't inside of an RCU-sched read-side critical section, data - * may become NULL after the check below. - */ - if (data) - data->func(data, time, util, max); -} - /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index db46190bb246..1c25ef405616 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "cpufreq_governor.h" diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a50c5b2e3bf2..a5ea52f793f3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -146,36 +146,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. - * - * The way cpufreq is currently arranged requires it to evaluate the CPU - * performance state (frequency/voltage) on a regular basis to prevent it from - * being stuck in a completely inadequate performance level for too long. - * That is not guaranteed to happen if the updates are only triggered from CFS, - * though, because they may not be coming in if RT or deadline tasks are active - * all the time (or there are RT and DL tasks only). - * - * As a workaround for that issue, this function is called by the RT and DL - * sched classes to trigger extra cpufreq updates to prevent it from stalling, - * but that really is a band-aid. Going forward it should be replaced with - * solutions targeted more specifically at RT and DL tasks. - */ -static inline void cpufreq_trigger_update(u64 time) -{ - cpufreq_update_util(time, ULONG_MAX, 0); -} - -struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); -}; - -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); - unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -187,10 +157,6 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else -static inline void cpufreq_update_util(u64 time, unsigned long util, - unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} - static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..913e755ef7b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +#endif /* CONFIG_CPU_FREQ */ + #endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 67687973ce80..9507522164ac 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o +obj-$(CONFIG_CPU_FREQ) += cpufreq.o diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c new file mode 100644 index 000000000000..928c4ba32f68 --- /dev/null +++ b/kernel/sched/cpufreq.c @@ -0,0 +1,37 @@ +/* + * Scheduler code and data structures related to cpufreq. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "sched.h" + +DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * @cpu: The CPU to set the pointer for. + * @data: New pointer value. + * + * Set and publish the update_util_data pointer for the given CPU. That pointer + * points to a struct update_util_data object containing a callback function + * to call from cpufreq_update_util(). That function will be called from an RCU + * read-side critical section, so it must not sleep. + * + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. + */ +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +{ + if (WARN_ON(data && !data->func)) + return; + + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); +} +EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f042190c8002..faf7e2758dd0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -9,7 +9,6 @@ #include #include #include -#include #include "cpupri.h" #include "cpudeadline.h" @@ -1739,3 +1738,51 @@ static inline u64 irq_time_read(int cpu) } #endif /* CONFIG_64BIT */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + +#ifdef CONFIG_CPU_FREQ +DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @time: Current time. + * @util: Current utilization. + * @max: Utilization ceiling. + * + * This function is called by the scheduler on every invocation of + * update_load_avg() on the CPU whose utilization is being updated. + * + * It can only be called from RCU-sched read-side critical sections. + */ +static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, time, util, max); +} + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} +#else +static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} +#endif /* CONFIG_CPU_FREQ */ From b54a0dfd56d5c314302987e0ff173b3f1bfcb555 Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Tue, 8 Mar 2016 10:31:14 +0100 Subject: [PATCH 90/94] intel_pstate: Remove extra conversions in pid calculation pid->setpoint and pid->deadband can be initialized in fixed point, so we can avoid the int_tofp in pid_calc. Signed-off-by: Philippe Longepe Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 23bb798d0cd2..864214de5cdf 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -198,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { - pid->setpoint = setpoint; - pid->deadband = deadband; + pid->setpoint = int_tofp(setpoint); + pid->deadband = int_tofp(deadband); pid->integral = int_tofp(integral); pid->last_err = int_tofp(setpoint) - int_tofp(busy); } @@ -225,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) int32_t pterm, dterm, fp_error; int32_t integral_limit; - fp_error = int_tofp(pid->setpoint) - busy; + fp_error = pid->setpoint - busy; - if (abs(fp_error) <= int_tofp(pid->deadband)) + if (abs(fp_error) <= pid->deadband) return 0; pterm = mul_fp(pid->p_gain, fp_error); From a158bed5dc92bd83338225135d448958e0b3745d Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Sun, 6 Mar 2016 08:34:04 +0100 Subject: [PATCH 91/94] intel_pstate: Optimize calculation for max/min_perf_adj mul_fp(int_tofp(A), B) expands to: ((A << FRAC_BITS) * B) >> FRAC_BITS, so the same result can be obtained via simple multiplication A * B. Apply this observation to max_perf * limits->max_perf and max_perf * limits->min_perf in intel_pstate_get_min_max()." Signed-off-by: Philippe Longepe Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 864214de5cdf..5b5bfc1c90f1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -831,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + max_perf_adj = fp_toint(max_perf * limits->max_perf); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + min_perf = fp_toint(max_perf * limits->min_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } From 7349ec0470b62820ae226e30770b9d84a53ced9d Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Sun, 6 Mar 2016 08:34:05 +0100 Subject: [PATCH 92/94] intel_pstate: Move intel_pstate_calc_busy() into get_target_pstate_use_performance() The cpu_load algorithm doesn't need to invoke intel_pstate_calc_busy(), so move that call from intel_pstate_sample() to get_target_pstate_use_performance(). Signed-off-by: Philippe Longepe Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5b5bfc1c90f1..95cc21713bb4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -915,8 +915,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) cpu->sample.mperf -= cpu->prev_mperf; cpu->sample.tsc -= cpu->prev_tsc; - intel_pstate_calc_busy(cpu); - cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; @@ -945,7 +943,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) mperf = cpu->sample.mperf + delta_iowait_mperf; cpu->prev_cummulative_iowait = cummulative_iowait; - /* * The load can be estimated as the ratio of the mperf counter * running at a constant frequency during active periods @@ -963,6 +960,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t core_busy, max_pstate, current_pstate, sample_ratio; u64 duration_ns; + intel_pstate_calc_busy(cpu); + /* * core_busy is the ratio of actual performance to max * max_pstate is the max non turbo pstate available From 8fa520af50817d5f30d293f507c937f561b3e6b9 Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Sun, 6 Mar 2016 08:34:06 +0100 Subject: [PATCH 93/94] intel_pstate: Remove freq calculation from intel_pstate_calc_busy() Use a helper function to compute the average pstate and call it only where it is needed (only when tracing or in intel_pstate_get). Signed-off-by: Philippe Longepe Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 95cc21713bb4..4acb904908d1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -881,12 +881,6 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) core_pct = int_tofp(sample->aperf) * int_tofp(100); core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); - sample->freq = fp_toint( - mul_fp(int_tofp( - cpu->pstate.max_pstate_physical * - cpu->pstate.scaling / 100), - core_pct)); - sample->core_pct_busy = (int32_t)core_pct; } @@ -920,6 +914,12 @@ static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) cpu->prev_tsc = tsc; } +static inline int32_t get_avg_frequency(struct cpudata *cpu) +{ + return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * + cpu->pstate.scaling, cpu->sample.mperf); +} + static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1015,7 +1015,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - sample->freq); + get_avg_frequency(cpu)); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, @@ -1104,7 +1104,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) if (!cpu) return 0; sample = &cpu->sample; - return sample->freq; + return get_avg_frequency(cpu); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) From 4fec7ad5f637159525265a45f66482cf8817b45f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Mar 2016 23:45:19 +0100 Subject: [PATCH 94/94] intel_pstate: Do not skip samples partially If the current value of MPERF or the current value of TSC is the same as the previous one, respectively, intel_pstate_sample() bails out early and skips the sample. However, intel_pstate_adjust_busy_pstate() is still called in that case which is not correct, so modify intel_pstate_sample() to return a bool value indicating whether or not the sample has been taken and use it to decide whether or not to call intel_pstate_adjust_busy_pstate(). While at it, remove redundant parentheses from the MPERF/TSC check in intel_pstate_sample(). Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4acb904908d1..cb5607495816 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -884,7 +884,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->core_pct_busy = (int32_t)core_pct; } -static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) +static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) { u64 aperf, mperf; unsigned long flags; @@ -894,9 +894,9 @@ static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); tsc = rdtsc(); - if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { + if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { local_irq_restore(flags); - return; + return false; } local_irq_restore(flags); @@ -912,6 +912,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu, u64 time) cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; + return true; } static inline int32_t get_avg_frequency(struct cpudata *cpu) @@ -1025,8 +1026,9 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, u64 delta_ns = time - cpu->sample.time; if ((s64)delta_ns >= pid_params.sample_rate_ns) { - intel_pstate_sample(cpu, time); - if (!hwp_active) + bool sample_taken = intel_pstate_sample(cpu, time); + + if (sample_taken && !hwp_active) intel_pstate_adjust_busy_pstate(cpu); } }