diff --git a/MAINTAINERS b/MAINTAINERS index 0d31d2b58d8c..ee6cf4d1010c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4133,7 +4133,9 @@ F: Documentation/admin-guide/pm/intel_pstate.rst F: Documentation/cpu-freq/ F: Documentation/devicetree/bindings/cpufreq/ F: drivers/cpufreq/ +F: kernel/sched/cpufreq*.c F: include/linux/cpufreq.h +F: include/linux/sched/cpufreq.h F: tools/testing/selftests/cpufreq/ CPU FREQUENCY DRIVERS - ARM BIG LITTLE diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index c93fe0f256de..ebc53804d57b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -758,15 +758,20 @@ static int cpufreq_callback(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - int cpu = freq->cpu; + struct cpumask *cpus = freq->policy->cpus; + int cpu, first = cpumask_first(cpus); + unsigned int lpj; if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { - per_cpu(l_p_j_ref, cpu) = - per_cpu(cpu_data, cpu).loops_per_jiffy; - per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!per_cpu(l_p_j_ref, first)) { + for_each_cpu(cpu, cpus) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + } + if (!global_l_p_j_ref) { global_l_p_j_ref = loops_per_jiffy; global_l_p_j_ref_freq = freq->old; @@ -778,10 +783,11 @@ static int cpufreq_callback(struct notifier_block *nb, loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, global_l_p_j_ref_freq, freq->new); - per_cpu(cpu_data, cpu).loops_per_jiffy = - cpufreq_scale(per_cpu(l_p_j_ref, cpu), - per_cpu(l_p_j_ref_freq, cpu), - freq->new); + + lpj = cpufreq_scale(per_cpu(l_p_j_ref, first), + per_cpu(l_p_j_ref_freq, first), freq->new); + for_each_cpu(cpu, cpus) + per_cpu(cpu_data, cpu).loops_per_jiffy = lpj; } return NOTIFY_OK; } diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3eb77943ce12..89fb05f90609 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val void *data) { struct cpufreq_freqs *freq = data; - unsigned int cpu = freq->cpu; - struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + unsigned int cpu; + struct freq_table *ft; - if (!ft->ref_freq) { - ft->ref_freq = freq->old; - ft->clock_tick_ref = cpu_data(cpu).clock_tick; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - cpu_data(cpu).clock_tick = - cpufreq_scale(ft->clock_tick_ref, - ft->ref_freq, - freq->new); + for_each_cpu(cpu, freq->policy->cpus) { + ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, + freq->new); + } } return 0; diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index f4dd73396f28..ebb14a26f117 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -97,6 +97,7 @@ static void intel_epb_restore(void) wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); } +#ifdef CONFIG_PM static struct syscore_ops intel_epb_syscore_ops = { .suspend = intel_epb_save, .resume = intel_epb_restore, @@ -193,6 +194,25 @@ static int intel_epb_offline(unsigned int cpu) return 0; } +static inline void register_intel_ebp_syscore_ops(void) +{ + register_syscore_ops(&intel_epb_syscore_ops); +} +#else /* !CONFIG_PM */ +static int intel_epb_online(unsigned int cpu) +{ + intel_epb_restore(); + return 0; +} + +static int intel_epb_offline(unsigned int cpu) +{ + return intel_epb_save(); +} + +static inline void register_intel_ebp_syscore_ops(void) {} +#endif + static __init int intel_epb_init(void) { int ret; @@ -206,7 +226,7 @@ static __init int intel_epb_init(void) if (ret < 0) goto err_out_online; - register_syscore_ops(&intel_epb_syscore_ops); + register_intel_ebp_syscore_ops(); return 0; err_out_online: diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 15b5e98a86f9..356dfc555a27 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -979,7 +979,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d75bb97b983c..b9591abde62a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6698,10 +6698,8 @@ static void kvm_hyperv_tsc_notifier(void) } #endif -static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) { - struct cpufreq_freqs *freq = data; struct kvm *kvm; struct kvm_vcpu *vcpu; int i, send_ipi = 0; @@ -6745,17 +6743,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * */ - if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) - return 0; - if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) - return 0; - - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->cpu != freq->cpu) + if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); if (vcpu->cpu != smp_processor_id()) @@ -6777,8 +6770,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * guest context is entered kvmclock will be updated, * so the guest will not see stale values. */ - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); } +} + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + + for_each_cpu(cpu, freq->policy->cpus) + __kvmclock_cpufreq_notifier(freq, cpu); + return 0; } diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 7a6aa2318915..33c30c1e6a30 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -128,6 +128,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) #define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) +#define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -515,7 +516,9 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, * (1) The domain is configured as always on. * (2) When the domain has a subdomain being powered on. */ - if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0) + if (genpd_is_always_on(genpd) || + genpd_is_rpm_always_on(genpd) || + atomic_read(&genpd->sd_count) > 0) return -EBUSY; list_for_each_entry(pdd, &genpd->dev_list, list_node) { @@ -1812,7 +1815,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, } /* Always-on domains must be powered on at initialization. */ - if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) + if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) && + !genpd_status_on(genpd)) return -EINVAL; if (genpd_is_cpu_domain(genpd) && diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index db779b650fce..85ff958e01f1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -340,11 +340,14 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { + int cpu; + BUG_ON(irqs_disabled()); if (cpufreq_disabled()) return; + freqs->policy = policy; freqs->flags = cpufreq_driver->flags; pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); @@ -364,10 +367,8 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, } } - for_each_cpu(freqs->cpu, policy->cpus) { - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_PRECHANGE, freqs); - } + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); adjust_jiffies(CPUFREQ_PRECHANGE, freqs); break; @@ -377,11 +378,11 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new, cpumask_pr_args(policy->cpus)); - for_each_cpu(freqs->cpu, policy->cpus) { - trace_cpu_frequency(freqs->new, freqs->cpu); - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_POSTCHANGE, freqs); - } + for_each_cpu(cpu, policy->cpus) + trace_cpu_frequency(freqs->new, cpu); + + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); cpufreq_stats_record_transition(policy, freqs->new); policy->cur = freqs->new; @@ -618,50 +619,52 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } +static int cpufreq_parse_policy(char *str_governor, + struct cpufreq_policy *policy) +{ + if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + return 0; + } + if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { + policy->policy = CPUFREQ_POLICY_POWERSAVE; + return 0; + } + return -EINVAL; +} + /** - * cpufreq_parse_governor - parse a governor string + * cpufreq_parse_governor - parse a governor string only for !setpolicy */ static int cpufreq_parse_governor(char *str_governor, struct cpufreq_policy *policy) { - if (cpufreq_driver->setpolicy) { - if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; - } + struct cpufreq_governor *t; - if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_POWERSAVE; - return 0; - } - } else { - struct cpufreq_governor *t; + mutex_lock(&cpufreq_governor_mutex); + + t = find_governor(str_governor); + if (!t) { + int ret; + + mutex_unlock(&cpufreq_governor_mutex); + + ret = request_module("cpufreq_%s", str_governor); + if (ret) + return -EINVAL; mutex_lock(&cpufreq_governor_mutex); t = find_governor(str_governor); - if (!t) { - int ret; + } + if (t && !try_module_get(t->owner)) + t = NULL; - mutex_unlock(&cpufreq_governor_mutex); + mutex_unlock(&cpufreq_governor_mutex); - ret = request_module("cpufreq_%s", str_governor); - if (ret) - return -EINVAL; - - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - } - if (t && !try_module_get(t->owner)) - t = NULL; - - mutex_unlock(&cpufreq_governor_mutex); - - if (t) { - policy->governor = t; - return 0; - } + if (t) { + policy->governor = t; + return 0; } return -EINVAL; @@ -783,8 +786,13 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy)) - return -EINVAL; + if (cpufreq_driver->setpolicy) { + if (cpufreq_parse_policy(str_governor, &new_policy)) + return -EINVAL; + } else { + if (cpufreq_parse_governor(str_governor, &new_policy)) + return -EINVAL; + } ret = cpufreq_set_policy(policy, &new_policy); @@ -1050,32 +1058,39 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *gov = NULL; + struct cpufreq_governor *gov = NULL, *def_gov = NULL; struct cpufreq_policy new_policy; memcpy(&new_policy, policy, sizeof(*policy)); - /* Update governor of new_policy to the governor used before hotplug */ - gov = find_governor(policy->last_governor); - if (gov) { - pr_debug("Restoring governor %s for cpu %d\n", + def_gov = cpufreq_default_governor(); + + if (has_target()) { + /* + * Update governor of new_policy to the governor used before + * hotplug + */ + gov = find_governor(policy->last_governor); + if (gov) { + pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); + } else { + if (!def_gov) + return -ENODATA; + gov = def_gov; + } + new_policy.governor = gov; } else { - gov = cpufreq_default_governor(); - if (!gov) - return -ENODATA; - } - - new_policy.governor = gov; - - /* Use the default policy if there is no last_policy. */ - if (cpufreq_driver->setpolicy) { - if (policy->last_policy) + /* Use the default policy if there is no last_policy. */ + if (policy->last_policy) { new_policy.policy = policy->last_policy; - else - cpufreq_parse_governor(gov->name, &new_policy); + } else { + if (!def_gov) + return -ENODATA; + cpufreq_parse_policy(def_gov->name, &new_policy); + } } - /* set default policy */ + return cpufreq_set_policy(policy, &new_policy); } @@ -1133,6 +1148,11 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) cpufreq_global_kobject, "policy%u", cpu); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + /* + * The entire policy object will be freed below, but the extra + * memory allocated for the kobject name needs to be freed by + * releasing the kobject. + */ kobject_put(&policy->kobj); goto err_free_real_cpus; } diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 7d14a4b4e82a..29b43651c261 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -431,10 +431,19 @@ static int imx_gpc_probe(struct platform_device *pdev) return ret; } - /* Disable PU power down in normal operation if ERR009619 is present */ + /* + * Disable PU power down by runtime PM if ERR009619 is present. + * + * The PRE clock will be paused for several cycles when turning on the + * PU domain LDO from power down state. If PRE is in use at that time, + * the IPU/PRG cannot get the correct display data from the PRE. + * + * This is not a concern when the whole system enters suspend state, so + * it's safe to power down PU in this case. + */ if (of_id_data->err009619_present) imx_gpc_domains[GPC_PGC_DOMAIN_PU].base.flags |= - GENPD_FLAG_ALWAYS_ON; + GENPD_FLAG_RPM_ALWAYS_ON; /* Keep DISP always on if ERR006287 is present */ if (of_id_data->err006287_present) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 684caf067003..d01a74fbc4db 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -42,13 +42,6 @@ enum cpufreq_table_sorting { CPUFREQ_TABLE_SORTED_DESCENDING }; -struct cpufreq_freqs { - unsigned int cpu; /* cpu nr */ - unsigned int old; - unsigned int new; - u8 flags; /* flags of cpufreq_driver, see below. */ -}; - struct cpufreq_cpuinfo { unsigned int max_freq; unsigned int min_freq; @@ -156,6 +149,13 @@ struct cpufreq_policy { struct thermal_cooling_device *cdev; }; +struct cpufreq_freqs { + struct cpufreq_policy *policy; + unsigned int old; + unsigned int new; + u8 flags; /* flags of cpufreq_driver, see below. */ +}; + /* Only for ACPI */ #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 0e8e356bed6a..b21f35f0ee2e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -53,12 +53,16 @@ * driver must then comply with the so called, * last-man-standing algorithm, for the CPUs in the * PM domain. + * + * GENPD_FLAG_RPM_ALWAYS_ON: Instructs genpd to always keep the PM domain + * powered on except for system suspend. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) #define GENPD_FLAG_CPU_DOMAIN (1U << 4) +#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */