From 289d72afddf83440117c35d864bf0c6309c1d011 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:08 +0530 Subject: [PATCH 01/24] thermal: cpu_cooling: Avoid accessing potentially freed structures After the lock is dropped, it is possible that the cpufreq_dev gets freed before we call get_level() and that can cause kernel to crash. Drop the lock after we are done using the structure. Cc: 4.2+ # 4.2+ Fixes: 02373d7c69b4 ("thermal: cpu_cooling: fix lockdep problems in cpu_cooling") Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 69d0f430b2d1..be29489dd247 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -153,8 +153,10 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) mutex_lock(&cooling_list_lock); list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { + unsigned long level = get_level(cpufreq_dev, freq); + mutex_unlock(&cooling_list_lock); - return get_level(cpufreq_dev, freq); + return level; } } mutex_unlock(&cooling_list_lock); From fb8ea3082169612933b5cd14f30415e81f926f7b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:09 +0530 Subject: [PATCH 02/24] thermal: cpu_cooling: rearrange globals Just to make it look better. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index be29489dd247..ce94aafed25d 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -105,8 +105,8 @@ struct cpufreq_cooling_device { struct device *cpu_dev; get_static_t plat_get_static_power; }; -static DEFINE_IDA(cpufreq_ida); +static DEFINE_IDA(cpufreq_ida); static DEFINE_MUTEX(cooling_list_lock); static LIST_HEAD(cpufreq_dev_list); From 1dea432a671aa87068c28194a6a602a6f358f749 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:10 +0530 Subject: [PATCH 03/24] thermal: cpu_cooling: Name cpufreq cooling devices as cpufreq_cdev Objects of "struct cpufreq_cooling_device" are named a bit inconsistently. Lets use cpufreq_cdev everywhere. Also note that the lists containing such devices is renamed similarly too. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 248 +++++++++++++++++----------------- 1 file changed, 124 insertions(+), 124 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index ce94aafed25d..80a46a80817b 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -108,27 +108,27 @@ struct cpufreq_cooling_device { static DEFINE_IDA(cpufreq_ida); static DEFINE_MUTEX(cooling_list_lock); -static LIST_HEAD(cpufreq_dev_list); +static LIST_HEAD(cpufreq_cdev_list); /* Below code defines functions to be used for cpufreq as cooling device */ /** * get_level: Find the level for a particular frequency - * @cpufreq_dev: cpufreq_dev for which the property is required + * @cpufreq_cdev: cpufreq_cdev for which the property is required * @freq: Frequency * * Return: level on success, THERMAL_CSTATE_INVALID on error. */ -static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev, +static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, unsigned int freq) { unsigned long level; - for (level = 0; level <= cpufreq_dev->max_level; level++) { - if (freq == cpufreq_dev->freq_table[level]) + for (level = 0; level <= cpufreq_cdev->max_level; level++) { + if (freq == cpufreq_cdev->freq_table[level]) return level; - if (freq > cpufreq_dev->freq_table[level]) + if (freq > cpufreq_cdev->freq_table[level]) break; } @@ -148,12 +148,12 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev, */ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) { - struct cpufreq_cooling_device *cpufreq_dev; + struct cpufreq_cooling_device *cpufreq_cdev; mutex_lock(&cooling_list_lock); - list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { - if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { - unsigned long level = get_level(cpufreq_dev, freq); + list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { + if (cpumask_test_cpu(cpu, &cpufreq_cdev->allowed_cpus)) { + unsigned long level = get_level(cpufreq_cdev, freq); mutex_unlock(&cooling_list_lock); return level; @@ -183,14 +183,14 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, { struct cpufreq_policy *policy = data; unsigned long clipped_freq; - struct cpufreq_cooling_device *cpufreq_dev; + struct cpufreq_cooling_device *cpufreq_cdev; if (event != CPUFREQ_ADJUST) return NOTIFY_DONE; mutex_lock(&cooling_list_lock); - list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { - if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus)) + list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { + if (!cpumask_test_cpu(policy->cpu, &cpufreq_cdev->allowed_cpus)) continue; /* @@ -204,7 +204,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, * But, if clipped_freq is greater than policy->max, we don't * need to do anything. */ - clipped_freq = cpufreq_dev->clipped_freq; + clipped_freq = cpufreq_cdev->clipped_freq; if (policy->max > clipped_freq) cpufreq_verify_within_limits(policy, 0, clipped_freq); @@ -217,11 +217,11 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, /** * build_dyn_power_table() - create a dynamic power to frequency table - * @cpufreq_device: the cpufreq cooling device in which to store the table + * @cpufreq_cdev: the cpufreq cooling device in which to store the table * @capacitance: dynamic power coefficient for these cpus * * Build a dynamic power to frequency table for this cpu and store it - * in @cpufreq_device. This table will be used in cpu_power_to_freq() and + * in @cpufreq_cdev. This table will be used in cpu_power_to_freq() and * cpu_freq_to_power() to convert between power and frequency * efficiently. Power is stored in mW, frequency in KHz. The * resulting table is in ascending order. @@ -230,7 +230,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, * -ENOMEM if we run out of memory or -EAGAIN if an OPP was * added/enabled while the function was executing. */ -static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, +static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, u32 capacitance) { struct power_table *power_table; @@ -239,10 +239,10 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, int num_opps = 0, cpu, i, ret = 0; unsigned long freq; - for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { + for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) { dev = get_cpu_device(cpu); if (!dev) { - dev_warn(&cpufreq_device->cool_dev->device, + dev_warn(&cpufreq_cdev->cool_dev->device, "No cpu device for cpu %d\n", cpu); continue; } @@ -295,9 +295,9 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, goto free_power_table; } - cpufreq_device->cpu_dev = dev; - cpufreq_device->dyn_power_table = power_table; - cpufreq_device->dyn_power_table_entries = i; + cpufreq_cdev->cpu_dev = dev; + cpufreq_cdev->dyn_power_table = power_table; + cpufreq_cdev->dyn_power_table_entries = i; return 0; @@ -307,26 +307,26 @@ free_power_table: return ret; } -static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, +static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, u32 freq) { int i; - struct power_table *pt = cpufreq_device->dyn_power_table; + struct power_table *pt = cpufreq_cdev->dyn_power_table; - for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) + for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++) if (freq < pt[i].frequency) break; return pt[i - 1].power; } -static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, +static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, u32 power) { int i; - struct power_table *pt = cpufreq_device->dyn_power_table; + struct power_table *pt = cpufreq_cdev->dyn_power_table; - for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) + for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++) if (power < pt[i].power) break; @@ -335,37 +335,37 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, /** * get_load() - get load for a cpu since last updated - * @cpufreq_device: &struct cpufreq_cooling_device for this cpu + * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu * @cpu: cpu number - * @cpu_idx: index of the cpu in cpufreq_device->allowed_cpus + * @cpu_idx: index of the cpu in cpufreq_cdev->allowed_cpus * * Return: The average load of cpu @cpu in percentage since this * function was last called. */ -static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu, +static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu, int cpu_idx) { u32 load; u64 now, now_idle, delta_time, delta_idle; now_idle = get_cpu_idle_time(cpu, &now, 0); - delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx]; - delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx]; + delta_idle = now_idle - cpufreq_cdev->time_in_idle[cpu_idx]; + delta_time = now - cpufreq_cdev->time_in_idle_timestamp[cpu_idx]; if (delta_time <= delta_idle) load = 0; else load = div64_u64(100 * (delta_time - delta_idle), delta_time); - cpufreq_device->time_in_idle[cpu_idx] = now_idle; - cpufreq_device->time_in_idle_timestamp[cpu_idx] = now; + cpufreq_cdev->time_in_idle[cpu_idx] = now_idle; + cpufreq_cdev->time_in_idle_timestamp[cpu_idx] = now; return load; } /** * get_static_power() - calculate the static power consumed by the cpus - * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev + * @cpufreq_cdev: struct &cpufreq_cooling_device for this cpu cdev * @tz: thermal zone device in which we're operating * @freq: frequency in KHz * @power: pointer in which to store the calculated static power @@ -378,25 +378,24 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu, * * Return: 0 on success, -E* on failure. */ -static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, +static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev, struct thermal_zone_device *tz, unsigned long freq, u32 *power) { struct dev_pm_opp *opp; unsigned long voltage; - struct cpumask *cpumask = &cpufreq_device->allowed_cpus; + struct cpumask *cpumask = &cpufreq_cdev->allowed_cpus; unsigned long freq_hz = freq * 1000; - if (!cpufreq_device->plat_get_static_power || - !cpufreq_device->cpu_dev) { + if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) { *power = 0; return 0; } - opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz, + opp = dev_pm_opp_find_freq_exact(cpufreq_cdev->cpu_dev, freq_hz, true); if (IS_ERR(opp)) { - dev_warn_ratelimited(cpufreq_device->cpu_dev, + dev_warn_ratelimited(cpufreq_cdev->cpu_dev, "Failed to find OPP for frequency %lu: %ld\n", freq_hz, PTR_ERR(opp)); return -EINVAL; @@ -406,31 +405,31 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, dev_pm_opp_put(opp); if (voltage == 0) { - dev_err_ratelimited(cpufreq_device->cpu_dev, + dev_err_ratelimited(cpufreq_cdev->cpu_dev, "Failed to get voltage for frequency %lu\n", freq_hz); return -EINVAL; } - return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay, - voltage, power); + return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay, + voltage, power); } /** * get_dynamic_power() - calculate the dynamic power - * @cpufreq_device: &cpufreq_cooling_device for this cdev + * @cpufreq_cdev: &cpufreq_cooling_device for this cdev * @freq: current frequency * * Return: the dynamic power consumed by the cpus described by - * @cpufreq_device. + * @cpufreq_cdev. */ -static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device, +static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev, unsigned long freq) { u32 raw_cpu_power; - raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq); - return (raw_cpu_power * cpufreq_device->last_load) / 100; + raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq); + return (raw_cpu_power * cpufreq_cdev->last_load) / 100; } /* cpufreq cooling device callback functions are defined below */ @@ -448,9 +447,9 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device, static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) { - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - *state = cpufreq_device->max_level; + *state = cpufreq_cdev->max_level; return 0; } @@ -467,9 +466,9 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - *state = cpufreq_device->cpufreq_state; + *state = cpufreq_cdev->cpufreq_state; return 0; } @@ -487,21 +486,21 @@ static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev, static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) { - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; - unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus); + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; + unsigned int cpu = cpumask_any(&cpufreq_cdev->allowed_cpus); unsigned int clip_freq; /* Request state should be less than max_level */ - if (WARN_ON(state > cpufreq_device->max_level)) + if (WARN_ON(state > cpufreq_cdev->max_level)) return -EINVAL; /* Check if the old cooling action is same as new cooling action */ - if (cpufreq_device->cpufreq_state == state) + if (cpufreq_cdev->cpufreq_state == state) return 0; - clip_freq = cpufreq_device->freq_table[state]; - cpufreq_device->cpufreq_state = state; - cpufreq_device->clipped_freq = clip_freq; + clip_freq = cpufreq_cdev->freq_table[state]; + cpufreq_cdev->cpufreq_state = state; + cpufreq_cdev->clipped_freq = clip_freq; cpufreq_update_policy(cpu); @@ -538,10 +537,10 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, unsigned long freq; int i = 0, cpu, ret; u32 static_power, dynamic_power, total_load = 0; - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; u32 *load_cpu = NULL; - cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); + cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask); /* * All the CPUs are offline, thus the requested power by @@ -555,16 +554,16 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, freq = cpufreq_quick_get(cpu); if (trace_thermal_power_cpu_get_power_enabled()) { - u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus); + u32 ncpus = cpumask_weight(&cpufreq_cdev->allowed_cpus); load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL); } - for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { + for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) { u32 load; if (cpu_online(cpu)) - load = get_load(cpufreq_device, cpu, i); + load = get_load(cpufreq_cdev, cpu, i); else load = 0; @@ -575,10 +574,10 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, i++; } - cpufreq_device->last_load = total_load; + cpufreq_cdev->last_load = total_load; - dynamic_power = get_dynamic_power(cpufreq_device, freq); - ret = get_static_power(cpufreq_device, tz, freq, &static_power); + dynamic_power = get_dynamic_power(cpufreq_cdev, freq); + ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); if (ret) { kfree(load_cpu); return ret; @@ -586,7 +585,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, if (load_cpu) { trace_thermal_power_cpu_get_power( - &cpufreq_device->allowed_cpus, + &cpufreq_cdev->allowed_cpus, freq, load_cpu, i, dynamic_power, static_power); kfree(load_cpu); @@ -619,12 +618,12 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, cpumask_var_t cpumask; u32 static_power, dynamic_power; int ret; - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) return -ENOMEM; - cpumask_and(cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask); + cpumask_and(cpumask, &cpufreq_cdev->allowed_cpus, cpu_online_mask); num_cpus = cpumask_weight(cpumask); /* None of our cpus are online, so no power */ @@ -634,14 +633,14 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, goto out; } - freq = cpufreq_device->freq_table[state]; + freq = cpufreq_cdev->freq_table[state]; if (!freq) { ret = -EINVAL; goto out; } - dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus; - ret = get_static_power(cpufreq_device, tz, freq, &static_power); + dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; + ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); if (ret) goto out; @@ -679,24 +678,24 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, int ret; s32 dyn_power; u32 last_load, normalised_power, static_power; - struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); + cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask); /* None of our cpus are online */ if (cpu >= nr_cpu_ids) return -ENODEV; cur_freq = cpufreq_quick_get(cpu); - ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power); + ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power); if (ret) return ret; dyn_power = power - static_power; dyn_power = dyn_power > 0 ? dyn_power : 0; - last_load = cpufreq_device->last_load ?: 1; + last_load = cpufreq_cdev->last_load ?: 1; normalised_power = (dyn_power * 100) / last_load; - target_freq = cpu_power_to_freq(cpufreq_device, normalised_power); + target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power); *state = cpufreq_cooling_get_level(cpu, target_freq); if (*state == THERMAL_CSTATE_INVALID) { @@ -706,7 +705,7 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, return -EINVAL; } - trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus, + trace_thermal_power_cpu_limit(&cpufreq_cdev->allowed_cpus, target_freq, *state, power); return 0; } @@ -771,7 +770,7 @@ __cpufreq_cooling_register(struct device_node *np, { struct cpufreq_policy *policy; struct thermal_cooling_device *cool_dev; - struct cpufreq_cooling_device *cpufreq_dev; + struct cpufreq_cooling_device *cpufreq_cdev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; cpumask_var_t temp_mask; @@ -798,49 +797,49 @@ __cpufreq_cooling_register(struct device_node *np, goto put_policy; } - cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL); - if (!cpufreq_dev) { + cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL); + if (!cpufreq_cdev) { cool_dev = ERR_PTR(-ENOMEM); goto put_policy; } num_cpus = cpumask_weight(clip_cpus); - cpufreq_dev->time_in_idle = kcalloc(num_cpus, - sizeof(*cpufreq_dev->time_in_idle), + cpufreq_cdev->time_in_idle = kcalloc(num_cpus, + sizeof(*cpufreq_cdev->time_in_idle), GFP_KERNEL); - if (!cpufreq_dev->time_in_idle) { + if (!cpufreq_cdev->time_in_idle) { cool_dev = ERR_PTR(-ENOMEM); goto free_cdev; } - cpufreq_dev->time_in_idle_timestamp = - kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp), + cpufreq_cdev->time_in_idle_timestamp = + kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp), GFP_KERNEL); - if (!cpufreq_dev->time_in_idle_timestamp) { + if (!cpufreq_cdev->time_in_idle_timestamp) { cool_dev = ERR_PTR(-ENOMEM); goto free_time_in_idle; } /* Find max levels */ cpufreq_for_each_valid_entry(pos, table) - cpufreq_dev->max_level++; + cpufreq_cdev->max_level++; - cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) * - cpufreq_dev->max_level, GFP_KERNEL); - if (!cpufreq_dev->freq_table) { + cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * + cpufreq_cdev->max_level, GFP_KERNEL); + if (!cpufreq_cdev->freq_table) { cool_dev = ERR_PTR(-ENOMEM); goto free_time_in_idle_timestamp; } /* max_level is an index, not a counter */ - cpufreq_dev->max_level--; + cpufreq_cdev->max_level--; - cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); + cpumask_copy(&cpufreq_cdev->allowed_cpus, clip_cpus); if (capacitance) { - cpufreq_dev->plat_get_static_power = plat_static_func; + cpufreq_cdev->plat_get_static_power = plat_static_func; - ret = build_dyn_power_table(cpufreq_dev, capacitance); + ret = build_dyn_power_table(cpufreq_cdev, capacitance); if (ret) { cool_dev = ERR_PTR(ret); goto free_table; @@ -856,12 +855,12 @@ __cpufreq_cooling_register(struct device_node *np, cool_dev = ERR_PTR(ret); goto free_power_table; } - cpufreq_dev->id = ret; + cpufreq_cdev->id = ret; /* Fill freq-table in descending order of frequencies */ - for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) { + for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) { freq = find_next_max(table, freq); - cpufreq_dev->freq_table[i] = freq; + cpufreq_cdev->freq_table[i] = freq; /* Warn for duplicate entries */ if (!freq) @@ -871,20 +870,21 @@ __cpufreq_cooling_register(struct device_node *np, } snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", - cpufreq_dev->id); + cpufreq_cdev->id); - cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, + cool_dev = thermal_of_cooling_device_register(np, dev_name, + cpufreq_cdev, cooling_ops); if (IS_ERR(cool_dev)) goto remove_ida; - cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0]; - cpufreq_dev->cool_dev = cool_dev; + cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0]; + cpufreq_cdev->cool_dev = cool_dev; mutex_lock(&cooling_list_lock); /* Register the notifier for first cpufreq cooling device */ - first = list_empty(&cpufreq_dev_list); - list_add(&cpufreq_dev->node, &cpufreq_dev_list); + first = list_empty(&cpufreq_cdev_list); + list_add(&cpufreq_cdev->node, &cpufreq_cdev_list); mutex_unlock(&cooling_list_lock); if (first) @@ -894,17 +894,17 @@ __cpufreq_cooling_register(struct device_node *np, goto put_policy; remove_ida: - ida_simple_remove(&cpufreq_ida, cpufreq_dev->id); + ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); free_power_table: - kfree(cpufreq_dev->dyn_power_table); + kfree(cpufreq_cdev->dyn_power_table); free_table: - kfree(cpufreq_dev->freq_table); + kfree(cpufreq_cdev->freq_table); free_time_in_idle_timestamp: - kfree(cpufreq_dev->time_in_idle_timestamp); + kfree(cpufreq_cdev->time_in_idle_timestamp); free_time_in_idle: - kfree(cpufreq_dev->time_in_idle); + kfree(cpufreq_cdev->time_in_idle); free_cdev: - kfree(cpufreq_dev); + kfree(cpufreq_cdev); put_policy: cpufreq_cpu_put(policy); free_cpumask: @@ -1029,30 +1029,30 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { - struct cpufreq_cooling_device *cpufreq_dev; + struct cpufreq_cooling_device *cpufreq_cdev; bool last; if (!cdev) return; - cpufreq_dev = cdev->devdata; + cpufreq_cdev = cdev->devdata; mutex_lock(&cooling_list_lock); - list_del(&cpufreq_dev->node); + list_del(&cpufreq_cdev->node); /* Unregister the notifier for the last cpufreq cooling device */ - last = list_empty(&cpufreq_dev_list); + last = list_empty(&cpufreq_cdev_list); mutex_unlock(&cooling_list_lock); if (last) cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); - thermal_cooling_device_unregister(cpufreq_dev->cool_dev); - ida_simple_remove(&cpufreq_ida, cpufreq_dev->id); - kfree(cpufreq_dev->dyn_power_table); - kfree(cpufreq_dev->time_in_idle_timestamp); - kfree(cpufreq_dev->time_in_idle); - kfree(cpufreq_dev->freq_table); - kfree(cpufreq_dev); + thermal_cooling_device_unregister(cpufreq_cdev->cool_dev); + ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); + kfree(cpufreq_cdev->dyn_power_table); + kfree(cpufreq_cdev->time_in_idle_timestamp); + kfree(cpufreq_cdev->time_in_idle); + kfree(cpufreq_cdev->freq_table); + kfree(cpufreq_cdev); } EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister); From 04bdbdf93cedc728ceff6af175fcce2bedfdd25f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:11 +0530 Subject: [PATCH 04/24] thermal: cpu_cooling: replace cool_dev with cdev Objects of "struct thermal_cooling_device" are named a bit inconsistently. Lets use cdev everywhere. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 80a46a80817b..f1e784c22c5a 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -65,7 +65,7 @@ struct power_table { * struct cpufreq_cooling_device - data for cooling device with cpufreq * @id: unique integer value corresponding to each cpufreq_cooling_device * registered. - * @cool_dev: thermal_cooling_device pointer to keep track of the + * @cdev: thermal_cooling_device pointer to keep track of the * registered cooling device. * @cpufreq_state: integer value representing the current state of cpufreq * cooling devices. @@ -90,7 +90,7 @@ struct power_table { */ struct cpufreq_cooling_device { int id; - struct thermal_cooling_device *cool_dev; + struct thermal_cooling_device *cdev; unsigned int cpufreq_state; unsigned int clipped_freq; unsigned int max_level; @@ -242,7 +242,7 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) { dev = get_cpu_device(cpu); if (!dev) { - dev_warn(&cpufreq_cdev->cool_dev->device, + dev_warn(&cpufreq_cdev->cdev->device, "No cpu device for cpu %d\n", cpu); continue; } @@ -769,7 +769,7 @@ __cpufreq_cooling_register(struct device_node *np, get_static_t plat_static_func) { struct cpufreq_policy *policy; - struct thermal_cooling_device *cool_dev; + struct thermal_cooling_device *cdev; struct cpufreq_cooling_device *cpufreq_cdev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; @@ -786,20 +786,20 @@ __cpufreq_cooling_register(struct device_node *np, policy = cpufreq_cpu_get(cpumask_first(temp_mask)); if (!policy) { pr_debug("%s: CPUFreq policy not found\n", __func__); - cool_dev = ERR_PTR(-EPROBE_DEFER); + cdev = ERR_PTR(-EPROBE_DEFER); goto free_cpumask; } table = policy->freq_table; if (!table) { pr_debug("%s: CPUFreq table not found\n", __func__); - cool_dev = ERR_PTR(-ENODEV); + cdev = ERR_PTR(-ENODEV); goto put_policy; } cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL); if (!cpufreq_cdev) { - cool_dev = ERR_PTR(-ENOMEM); + cdev = ERR_PTR(-ENOMEM); goto put_policy; } @@ -808,7 +808,7 @@ __cpufreq_cooling_register(struct device_node *np, sizeof(*cpufreq_cdev->time_in_idle), GFP_KERNEL); if (!cpufreq_cdev->time_in_idle) { - cool_dev = ERR_PTR(-ENOMEM); + cdev = ERR_PTR(-ENOMEM); goto free_cdev; } @@ -816,7 +816,7 @@ __cpufreq_cooling_register(struct device_node *np, kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp), GFP_KERNEL); if (!cpufreq_cdev->time_in_idle_timestamp) { - cool_dev = ERR_PTR(-ENOMEM); + cdev = ERR_PTR(-ENOMEM); goto free_time_in_idle; } @@ -827,7 +827,7 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * cpufreq_cdev->max_level, GFP_KERNEL); if (!cpufreq_cdev->freq_table) { - cool_dev = ERR_PTR(-ENOMEM); + cdev = ERR_PTR(-ENOMEM); goto free_time_in_idle_timestamp; } @@ -841,7 +841,7 @@ __cpufreq_cooling_register(struct device_node *np, ret = build_dyn_power_table(cpufreq_cdev, capacitance); if (ret) { - cool_dev = ERR_PTR(ret); + cdev = ERR_PTR(ret); goto free_table; } @@ -852,7 +852,7 @@ __cpufreq_cooling_register(struct device_node *np, ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL); if (ret < 0) { - cool_dev = ERR_PTR(ret); + cdev = ERR_PTR(ret); goto free_power_table; } cpufreq_cdev->id = ret; @@ -872,14 +872,13 @@ __cpufreq_cooling_register(struct device_node *np, snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", cpufreq_cdev->id); - cool_dev = thermal_of_cooling_device_register(np, dev_name, - cpufreq_cdev, - cooling_ops); - if (IS_ERR(cool_dev)) + cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev, + cooling_ops); + if (IS_ERR(cdev)) goto remove_ida; cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0]; - cpufreq_cdev->cool_dev = cool_dev; + cpufreq_cdev->cdev = cdev; mutex_lock(&cooling_list_lock); /* Register the notifier for first cpufreq cooling device */ @@ -909,7 +908,7 @@ put_policy: cpufreq_cpu_put(policy); free_cpumask: free_cpumask_var(temp_mask); - return cool_dev; + return cdev; } /** @@ -1047,7 +1046,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); - thermal_cooling_device_unregister(cpufreq_cdev->cool_dev); + thermal_cooling_device_unregister(cpufreq_cdev->cdev); ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); kfree(cpufreq_cdev->dyn_power_table); kfree(cpufreq_cdev->time_in_idle_timestamp); From 3e08b2df12983159ea2d9a1aa2b2bc601093b3cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:12 +0530 Subject: [PATCH 05/24] thermal: cpu_cooling: remove cpufreq_cooling_get_level() There is only one user of cpufreq_cooling_get_level() and that already has pointer to the cpufreq_cdev structure. It can directly call get_level() instead and we can get rid of cpufreq_cooling_get_level(). Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 33 +-------------------------------- include/linux/cpu_cooling.h | 6 ------ 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index f1e784c22c5a..1f4b6a719d05 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -135,37 +135,6 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, return THERMAL_CSTATE_INVALID; } -/** - * cpufreq_cooling_get_level - for a given cpu, return the cooling level. - * @cpu: cpu for which the level is required - * @freq: the frequency of interest - * - * This function will match the cooling level corresponding to the - * requested @freq and return it. - * - * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID - * otherwise. - */ -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) -{ - struct cpufreq_cooling_device *cpufreq_cdev; - - mutex_lock(&cooling_list_lock); - list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { - if (cpumask_test_cpu(cpu, &cpufreq_cdev->allowed_cpus)) { - unsigned long level = get_level(cpufreq_cdev, freq); - - mutex_unlock(&cooling_list_lock); - return level; - } - } - mutex_unlock(&cooling_list_lock); - - pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu); - return THERMAL_CSTATE_INVALID; -} -EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level); - /** * cpufreq_thermal_notifier - notifier callback for cpufreq policy change. * @nb: struct notifier_block * with callback info. @@ -697,7 +666,7 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, normalised_power = (dyn_power * 100) / last_load; target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power); - *state = cpufreq_cooling_get_level(cpu, target_freq); + *state = get_level(cpufreq_cdev, target_freq); if (*state == THERMAL_CSTATE_INVALID) { dev_err_ratelimited(&cdev->device, "Failed to convert %dKHz for cpu %d into a cdev state\n", diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index c156f5082758..96c5e4c2f9c8 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -82,7 +82,6 @@ of_cpufreq_power_cooling_register(struct device_node *np, */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -117,11 +116,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { return; } -static inline -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) -{ - return THERMAL_CSTATE_INVALID; -} #endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ From 18f301c934db0f7bae4e7c12e941bce11d67aec5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:13 +0530 Subject: [PATCH 06/24] thermal: cpu_cooling: get rid of a variable in cpufreq_set_cur_state() 'cpu' is used at only one place and there is no need to keep a separate variable for it. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 1f4b6a719d05..002b48dc6bea 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -456,7 +456,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) { struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - unsigned int cpu = cpumask_any(&cpufreq_cdev->allowed_cpus); unsigned int clip_freq; /* Request state should be less than max_level */ @@ -471,7 +470,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, cpufreq_cdev->cpufreq_state = state; cpufreq_cdev->clipped_freq = clip_freq; - cpufreq_update_policy(cpu); + cpufreq_update_policy(cpumask_any(&cpufreq_cdev->allowed_cpus)); return 0; } From 4d753aa7b6279e4b7d338947a434689962f430d1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:14 +0530 Subject: [PATCH 07/24] thermal: cpu_cooling: use cpufreq_policy to register cooling device The CPU cooling driver uses the cpufreq policy, to get clip_cpus, the frequency table, etc. Most of the callers of CPU cooling driver's registration routines have the cpufreq policy with them, but they only pass the policy->related_cpus cpumask. The __cpufreq_cooling_register() routine then gets the policy by itself and uses it. It would be much better if the callers can pass the policy instead directly. This also fixes a basic design flaw, where the policy can be freed while the CPU cooling driver is still active. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/cpufreq/arm_big_little.c | 2 +- drivers/cpufreq/cpufreq-dt.c | 2 +- drivers/cpufreq/dbx500-cpufreq.c | 2 +- drivers/cpufreq/mt8173-cpufreq.c | 4 +- drivers/cpufreq/qoriq-cpufreq.c | 3 +- drivers/thermal/cpu_cooling.c | 61 +++++++------------ drivers/thermal/imx_thermal.c | 22 +++++-- .../ti-soc-thermal/ti-thermal-common.c | 22 ++++--- include/linux/cpu_cooling.h | 26 ++++---- 9 files changed, 74 insertions(+), 70 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 418042201e6d..ea6d62547b10 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -540,7 +540,7 @@ static void bL_cpufreq_ready(struct cpufreq_policy *policy) &power_coefficient); cdev[cur_cluster] = of_cpufreq_power_cooling_register(np, - policy->related_cpus, power_coefficient, NULL); + policy, power_coefficient, NULL); if (IS_ERR(cdev[cur_cluster])) { dev_err(cpu_dev, "running cpufreq without cooling device: %ld\n", diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index c943787d761e..fef3c2160691 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -326,7 +326,7 @@ static void cpufreq_ready(struct cpufreq_policy *policy) &power_coefficient); priv->cdev = of_cpufreq_power_cooling_register(np, - policy->related_cpus, power_coefficient, NULL); + policy, power_coefficient, NULL); if (IS_ERR(priv->cdev)) { dev_err(priv->cpu_dev, "running cpufreq without cooling device: %ld\n", diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c index 3575b82210ba..4ee0431579c1 100644 --- a/drivers/cpufreq/dbx500-cpufreq.c +++ b/drivers/cpufreq/dbx500-cpufreq.c @@ -43,7 +43,7 @@ static int dbx500_cpufreq_exit(struct cpufreq_policy *policy) static void dbx500_cpufreq_ready(struct cpufreq_policy *policy) { - cdev = cpufreq_cooling_register(policy->cpus); + cdev = cpufreq_cooling_register(policy); if (IS_ERR(cdev)) pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev)); else diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index fd1886faf33a..f9f00fb4bc3a 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -320,9 +320,7 @@ static void mtk_cpufreq_ready(struct cpufreq_policy *policy) of_property_read_u32(np, DYNAMIC_POWER, &capacitance); info->cdev = of_cpufreq_power_cooling_register(np, - policy->related_cpus, - capacitance, - NULL); + policy, capacitance, NULL); if (IS_ERR(info->cdev)) { dev_err(info->cpu_dev, diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index e2ea433a5f9c..4ada55b8856e 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -278,8 +278,7 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy) struct device_node *np = of_get_cpu_node(policy->cpu, NULL); if (of_find_property(np, "#cooling-cells", NULL)) { - cpud->cdev = of_cpufreq_cooling_register(np, - policy->related_cpus); + cpud->cdev = of_cpufreq_cooling_register(np, policy); if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) { pr_err("cpu%d is not running as cooling device: %ld\n", diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 002b48dc6bea..58e58065b650 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -717,7 +717,7 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, /** * __cpufreq_cooling_register - helper function to create cpufreq cooling device * @np: a valid struct device_node to the cooling device device tree node - * @clip_cpus: cpumask of cpus where the frequency constraints will happen. + * @policy: cpufreq policy * Normally this should be same as cpufreq policy->related_cpus. * @capacitance: dynamic power coefficient for these cpus * @plat_static_func: function to calculate the static power consumed by these @@ -733,45 +733,34 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, */ static struct thermal_cooling_device * __cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, u32 capacitance, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { - struct cpufreq_policy *policy; struct thermal_cooling_device *cdev; struct cpufreq_cooling_device *cpufreq_cdev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; - cpumask_var_t temp_mask; unsigned int freq, i, num_cpus; int ret; struct thermal_cooling_device_ops *cooling_ops; bool first; - if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); - - cpumask_and(temp_mask, clip_cpus, cpu_online_mask); - policy = cpufreq_cpu_get(cpumask_first(temp_mask)); - if (!policy) { - pr_debug("%s: CPUFreq policy not found\n", __func__); - cdev = ERR_PTR(-EPROBE_DEFER); - goto free_cpumask; + if (IS_ERR_OR_NULL(policy)) { + pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy); + return ERR_PTR(-EINVAL); } table = policy->freq_table; if (!table) { pr_debug("%s: CPUFreq table not found\n", __func__); - cdev = ERR_PTR(-ENODEV); - goto put_policy; + return ERR_PTR(-ENODEV); } cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL); - if (!cpufreq_cdev) { - cdev = ERR_PTR(-ENOMEM); - goto put_policy; - } + if (!cpufreq_cdev) + return ERR_PTR(-ENOMEM); - num_cpus = cpumask_weight(clip_cpus); + num_cpus = cpumask_weight(policy->related_cpus); cpufreq_cdev->time_in_idle = kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle), GFP_KERNEL); @@ -802,7 +791,7 @@ __cpufreq_cooling_register(struct device_node *np, /* max_level is an index, not a counter */ cpufreq_cdev->max_level--; - cpumask_copy(&cpufreq_cdev->allowed_cpus, clip_cpus); + cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus); if (capacitance) { cpufreq_cdev->plat_get_static_power = plat_static_func; @@ -858,7 +847,7 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_register_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); - goto put_policy; + return cdev; remove_ida: ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); @@ -872,16 +861,12 @@ free_time_in_idle: kfree(cpufreq_cdev->time_in_idle); free_cdev: kfree(cpufreq_cdev); -put_policy: - cpufreq_cpu_put(policy); -free_cpumask: - free_cpumask_var(temp_mask); return cdev; } /** * cpufreq_cooling_register - function to create cpufreq cooling device. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen. + * @policy: cpufreq policy * * This interface function registers the cpufreq cooling device with the name * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq @@ -891,16 +876,16 @@ free_cpumask: * on failure, it returns a corresponding ERR_PTR(). */ struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus) +cpufreq_cooling_register(struct cpufreq_policy *policy) { - return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL); + return __cpufreq_cooling_register(NULL, policy, 0, NULL); } EXPORT_SYMBOL_GPL(cpufreq_cooling_register); /** * of_cpufreq_cooling_register - function to create cpufreq cooling device. * @np: a valid struct device_node to the cooling device device tree node - * @clip_cpus: cpumask of cpus where the frequency constraints will happen. + * @policy: cpufreq policy * * This interface function registers the cpufreq cooling device with the name * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq @@ -912,18 +897,18 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register); */ struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { if (!np) return ERR_PTR(-EINVAL); - return __cpufreq_cooling_register(np, clip_cpus, 0, NULL); + return __cpufreq_cooling_register(np, policy, 0, NULL); } EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); /** * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy * @capacitance: dynamic power coefficient for these cpus * @plat_static_func: function to calculate the static power consumed by these * cpus (optional) @@ -943,10 +928,10 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); * on failure, it returns a corresponding ERR_PTR(). */ struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { - return __cpufreq_cooling_register(NULL, clip_cpus, capacitance, + return __cpufreq_cooling_register(NULL, policy, capacitance, plat_static_func); } EXPORT_SYMBOL(cpufreq_power_cooling_register); @@ -954,7 +939,7 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register); /** * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions * @np: a valid struct device_node to the cooling device device tree node - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy * @capacitance: dynamic power coefficient for these cpus * @plat_static_func: function to calculate the static power consumed by these * cpus (optional) @@ -976,14 +961,14 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register); */ struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { if (!np) return ERR_PTR(-EINVAL); - return __cpufreq_cooling_register(np, clip_cpus, capacitance, + return __cpufreq_cooling_register(np, policy, capacitance, plat_static_func); } EXPORT_SYMBOL(of_cpufreq_power_cooling_register); diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index fb648a45754e..f7ec39f46ee4 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -88,6 +89,7 @@ static struct thermal_soc_data thermal_imx6sx_data = { }; struct imx_thermal_data { + struct cpufreq_policy *policy; struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; enum thermal_device_mode mode; @@ -525,13 +527,18 @@ static int imx_thermal_probe(struct platform_device *pdev) regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF); regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); - data->cdev = cpufreq_cooling_register(cpu_present_mask); + data->policy = cpufreq_cpu_get(0); + if (!data->policy) { + pr_debug("%s: CPUFreq policy not found\n", __func__); + return -EPROBE_DEFER; + } + + data->cdev = cpufreq_cooling_register(data->policy); if (IS_ERR(data->cdev)) { ret = PTR_ERR(data->cdev); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to register cpufreq cooling device: %d\n", - ret); + dev_err(&pdev->dev, + "failed to register cpufreq cooling device: %d\n", ret); + cpufreq_cpu_put(data->policy); return ret; } @@ -542,6 +549,7 @@ static int imx_thermal_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to get thermal clk: %d\n", ret); cpufreq_cooling_unregister(data->cdev); + cpufreq_cpu_put(data->policy); return ret; } @@ -556,6 +564,7 @@ static int imx_thermal_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret); cpufreq_cooling_unregister(data->cdev); + cpufreq_cpu_put(data->policy); return ret; } @@ -571,6 +580,7 @@ static int imx_thermal_probe(struct platform_device *pdev) "failed to register thermal zone device %d\n", ret); clk_disable_unprepare(data->thermal_clk); cpufreq_cooling_unregister(data->cdev); + cpufreq_cpu_put(data->policy); return ret; } @@ -599,6 +609,7 @@ static int imx_thermal_probe(struct platform_device *pdev) clk_disable_unprepare(data->thermal_clk); thermal_zone_device_unregister(data->tz); cpufreq_cooling_unregister(data->cdev); + cpufreq_cpu_put(data->policy); return ret; } @@ -620,6 +631,7 @@ static int imx_thermal_remove(struct platform_device *pdev) thermal_zone_device_unregister(data->tz); cpufreq_cooling_unregister(data->cdev); + cpufreq_cpu_put(data->policy); return 0; } diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 02790f69e26c..c211a8e4a210 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ /* common data structures */ struct ti_thermal_data { + struct cpufreq_policy *policy; struct thermal_zone_device *ti_thermal; struct thermal_zone_device *pcb_tz; struct thermal_cooling_device *cool_dev; @@ -247,15 +249,19 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id) if (!data) return -EINVAL; + data->policy = cpufreq_cpu_get(0); + if (!data->policy) { + pr_debug("%s: CPUFreq policy not found\n", __func__); + return -EPROBE_DEFER; + } + /* Register cooling device */ - data->cool_dev = cpufreq_cooling_register(cpu_present_mask); + data->cool_dev = cpufreq_cooling_register(data->policy); if (IS_ERR(data->cool_dev)) { int ret = PTR_ERR(data->cool_dev); - - if (ret != -EPROBE_DEFER) - dev_err(bgp->dev, - "Failed to register cpu cooling device %d\n", - ret); + dev_err(bgp->dev, "Failed to register cpu cooling device %d\n", + ret); + cpufreq_cpu_put(data->policy); return ret; } @@ -270,8 +276,10 @@ int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id) data = ti_bandgap_get_sensor_data(bgp, id); - if (data) + if (data) { cpufreq_cooling_unregister(data->cool_dev); + cpufreq_cpu_put(data->policy); + } return 0; } diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 96c5e4c2f9c8..d4292ebc5c8b 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,47 +28,49 @@ #include #include +struct cpufreq_policy; + typedef int (*get_static_t)(cpumask_t *cpumask, int interval, unsigned long voltage, u32 *power); #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus); +cpufreq_cooling_register(struct cpufreq_policy *policy); struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ #ifdef CONFIG_THERMAL_OF struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus); + struct cpufreq_policy *policy); struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { @@ -84,12 +86,12 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus) +cpufreq_cooling_register(struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { return NULL; @@ -97,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus, static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { From 55d852931319d2e3ccde86cd426405231ce6c6ac Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:15 +0530 Subject: [PATCH 08/24] cpufreq: create cpufreq_table_count_valid_entries() We need such a routine at two places already, lets create one. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/cpufreq/cpufreq_stats.c | 13 ++++--------- drivers/thermal/cpu_cooling.c | 22 +++++++++------------- include/linux/cpufreq.h | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index f570ead62454..9c3d319dc129 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -170,11 +170,10 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) unsigned int i = 0, count = 0, ret = -ENOMEM; struct cpufreq_stats *stats; unsigned int alloc_size; - struct cpufreq_frequency_table *pos, *table; + struct cpufreq_frequency_table *pos; - /* We need cpufreq table for creating stats table */ - table = policy->freq_table; - if (unlikely(!table)) + count = cpufreq_table_count_valid_entries(policy); + if (!count) return; /* stats already initialized */ @@ -185,10 +184,6 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) if (!stats) return; - /* Find total allocation size */ - cpufreq_for_each_valid_entry(pos, table) - count++; - alloc_size = count * sizeof(int) + count * sizeof(u64); alloc_size += count * count * sizeof(int); @@ -205,7 +200,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy) stats->max_state = count; /* Find valid-unique entries */ - cpufreq_for_each_valid_entry(pos, table) + cpufreq_for_each_valid_entry(pos, policy->freq_table) if (freq_table_get_index(stats, pos->frequency) == -1) stats->freq_table[i++] = pos->frequency; diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 58e58065b650..55ff45c1e917 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -739,7 +739,6 @@ __cpufreq_cooling_register(struct device_node *np, struct thermal_cooling_device *cdev; struct cpufreq_cooling_device *cpufreq_cdev; char dev_name[THERMAL_NAME_LENGTH]; - struct cpufreq_frequency_table *pos, *table; unsigned int freq, i, num_cpus; int ret; struct thermal_cooling_device_ops *cooling_ops; @@ -750,9 +749,10 @@ __cpufreq_cooling_register(struct device_node *np, return ERR_PTR(-EINVAL); } - table = policy->freq_table; - if (!table) { - pr_debug("%s: CPUFreq table not found\n", __func__); + i = cpufreq_table_count_valid_entries(policy); + if (!i) { + pr_debug("%s: CPUFreq table not found or has no valid entries\n", + __func__); return ERR_PTR(-ENODEV); } @@ -777,20 +777,16 @@ __cpufreq_cooling_register(struct device_node *np, goto free_time_in_idle; } - /* Find max levels */ - cpufreq_for_each_valid_entry(pos, table) - cpufreq_cdev->max_level++; + /* max_level is an index, not a counter */ + cpufreq_cdev->max_level = i - 1; - cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * - cpufreq_cdev->max_level, GFP_KERNEL); + cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i, + GFP_KERNEL); if (!cpufreq_cdev->freq_table) { cdev = ERR_PTR(-ENOMEM); goto free_time_in_idle_timestamp; } - /* max_level is an index, not a counter */ - cpufreq_cdev->max_level--; - cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus); if (capacitance) { @@ -816,7 +812,7 @@ __cpufreq_cooling_register(struct device_node *np, /* Fill freq-table in descending order of frequencies */ for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) { - freq = find_next_max(table, freq); + freq = find_next_max(policy->freq_table, freq); cpufreq_cdev->freq_table[i] = freq; /* Warn for duplicate entries */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..eb9abfadaeac 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, return -EINVAL; } } + +static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos; + int count = 0; + + if (unlikely(!policy->freq_table)) + return 0; + + cpufreq_for_each_valid_entry(pos, policy->freq_table) + count++; + + return count; +} #else static inline int cpufreq_boost_trigger_state(int state) { From b12b6519496bb07b0845ed50eee1ca75c3b7fa81 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:16 +0530 Subject: [PATCH 09/24] thermal: cpu_cooling: store cpufreq policy The cpufreq policy can be used by the cpu_cooling driver, lets store it in the cpufreq_cooling_device structure. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 55ff45c1e917..7dddc7443f5d 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -67,6 +67,7 @@ struct power_table { * registered. * @cdev: thermal_cooling_device pointer to keep track of the * registered cooling device. + * @policy: cpufreq policy. * @cpufreq_state: integer value representing the current state of cpufreq * cooling devices. * @clipped_freq: integer value representing the absolute value of the clipped @@ -91,6 +92,7 @@ struct power_table { struct cpufreq_cooling_device { int id; struct thermal_cooling_device *cdev; + struct cpufreq_policy *policy; unsigned int cpufreq_state; unsigned int clipped_freq; unsigned int max_level; @@ -760,6 +762,7 @@ __cpufreq_cooling_register(struct device_node *np, if (!cpufreq_cdev) return ERR_PTR(-ENOMEM); + cpufreq_cdev->policy = policy; num_cpus = cpumask_weight(policy->related_cpus); cpufreq_cdev->time_in_idle = kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle), From 02bacb21c6575ce408645ddb22fef44940d10257 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:17 +0530 Subject: [PATCH 10/24] thermal: cpu_cooling: OPPs are registered for all CPUs The OPPs are registered for all CPUs of a cpufreq policy now and we don't need to run the loop in build_dyn_power_table(). Just check for the policy->cpu and we should be fine. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 7dddc7443f5d..ce387f62c93e 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -83,7 +83,7 @@ struct power_table { * @dyn_power_table: array of struct power_table for frequency to power * conversion, sorted in ascending order. * @dyn_power_table_entries: number of entries in the @dyn_power_table array - * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered + * @cpu_dev: the cpu_device of policy->cpu. * @plat_get_static_power: callback to calculate the static power * * This structure is required for keeping information of each registered @@ -207,24 +207,20 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, struct power_table *power_table; struct dev_pm_opp *opp; struct device *dev = NULL; - int num_opps = 0, cpu, i, ret = 0; + int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i, ret = 0; unsigned long freq; - for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) { - dev = get_cpu_device(cpu); - if (!dev) { - dev_warn(&cpufreq_cdev->cdev->device, - "No cpu device for cpu %d\n", cpu); - continue; - } - - num_opps = dev_pm_opp_get_opp_count(dev); - if (num_opps > 0) - break; - else if (num_opps < 0) - return num_opps; + dev = get_cpu_device(cpu); + if (unlikely(!dev)) { + dev_warn(&cpufreq_cdev->cdev->device, + "No cpu device for cpu %d\n", cpu); + return -ENODEV; } + num_opps = dev_pm_opp_get_opp_count(dev); + if (num_opps < 0) + return num_opps; + if (num_opps == 0) return -EINVAL; From ba76dd9ddd4046ece873444c2256e09afbd5d32e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:18 +0530 Subject: [PATCH 11/24] thermal: cpu_cooling: get rid of 'allowed_cpus' 'allowed_cpus' is a copy of policy->related_cpus and can be replaced by it directly. At some places we are only concerned about online CPUs and policy->cpus can be used there. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 81 +++++++++++------------------------ 1 file changed, 25 insertions(+), 56 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index ce387f62c93e..94e7121817bf 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -74,7 +74,6 @@ struct power_table { * frequency. * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. - * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device. * @node: list_head to link all cpufreq_cooling_device together. * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @time_in_idle: previous reading of the absolute time that this cpu was idle @@ -97,7 +96,6 @@ struct cpufreq_cooling_device { unsigned int clipped_freq; unsigned int max_level; unsigned int *freq_table; /* In descending order */ - struct cpumask allowed_cpus; struct list_head node; u32 last_load; u64 *time_in_idle; @@ -161,7 +159,11 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, mutex_lock(&cooling_list_lock); list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { - if (!cpumask_test_cpu(policy->cpu, &cpufreq_cdev->allowed_cpus)) + /* + * A new copy of the policy is sent to the notifier and can't + * compare that directly. + */ + if (policy->cpu != cpufreq_cdev->policy->cpu) continue; /* @@ -304,7 +306,7 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, * get_load() - get load for a cpu since last updated * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu * @cpu: cpu number - * @cpu_idx: index of the cpu in cpufreq_cdev->allowed_cpus + * @cpu_idx: index of the cpu in time_in_idle* * * Return: The average load of cpu @cpu in percentage since this * function was last called. @@ -351,7 +353,7 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev, { struct dev_pm_opp *opp; unsigned long voltage; - struct cpumask *cpumask = &cpufreq_cdev->allowed_cpus; + struct cpumask *cpumask = cpufreq_cdev->policy->related_cpus; unsigned long freq_hz = freq * 1000; if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) { @@ -468,7 +470,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, cpufreq_cdev->cpufreq_state = state; cpufreq_cdev->clipped_freq = clip_freq; - cpufreq_update_policy(cpumask_any(&cpufreq_cdev->allowed_cpus)); + cpufreq_update_policy(cpufreq_cdev->policy->cpu); return 0; } @@ -504,28 +506,18 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, int i = 0, cpu, ret; u32 static_power, dynamic_power, total_load = 0; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; + struct cpufreq_policy *policy = cpufreq_cdev->policy; u32 *load_cpu = NULL; - cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask); - - /* - * All the CPUs are offline, thus the requested power by - * the cdev is 0 - */ - if (cpu >= nr_cpu_ids) { - *power = 0; - return 0; - } - - freq = cpufreq_quick_get(cpu); + freq = cpufreq_quick_get(policy->cpu); if (trace_thermal_power_cpu_get_power_enabled()) { - u32 ncpus = cpumask_weight(&cpufreq_cdev->allowed_cpus); + u32 ncpus = cpumask_weight(policy->related_cpus); load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL); } - for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) { + for_each_cpu(cpu, policy->related_cpus) { u32 load; if (cpu_online(cpu)) @@ -550,9 +542,9 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, } if (load_cpu) { - trace_thermal_power_cpu_get_power( - &cpufreq_cdev->allowed_cpus, - freq, load_cpu, i, dynamic_power, static_power); + trace_thermal_power_cpu_get_power(policy->related_cpus, freq, + load_cpu, i, dynamic_power, + static_power); kfree(load_cpu); } @@ -581,38 +573,22 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) { unsigned int freq, num_cpus; - cpumask_var_t cpumask; u32 static_power, dynamic_power; int ret; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_and(cpumask, &cpufreq_cdev->allowed_cpus, cpu_online_mask); - num_cpus = cpumask_weight(cpumask); - - /* None of our cpus are online, so no power */ - if (num_cpus == 0) { - *power = 0; - ret = 0; - goto out; - } + num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus); freq = cpufreq_cdev->freq_table[state]; - if (!freq) { - ret = -EINVAL; - goto out; - } + if (!freq) + return -EINVAL; dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); if (ret) - goto out; + return ret; *power = static_power + dynamic_power; -out: - free_cpumask_var(cpumask); return ret; } @@ -640,19 +616,14 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 power, unsigned long *state) { - unsigned int cpu, cur_freq, target_freq; + unsigned int cur_freq, target_freq; int ret; s32 dyn_power; u32 last_load, normalised_power, static_power; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; + struct cpufreq_policy *policy = cpufreq_cdev->policy; - cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask); - - /* None of our cpus are online */ - if (cpu >= nr_cpu_ids) - return -ENODEV; - - cur_freq = cpufreq_quick_get(cpu); + cur_freq = cpufreq_quick_get(policy->cpu); ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power); if (ret) return ret; @@ -667,12 +638,12 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, if (*state == THERMAL_CSTATE_INVALID) { dev_err_ratelimited(&cdev->device, "Failed to convert %dKHz for cpu %d into a cdev state\n", - target_freq, cpu); + target_freq, policy->cpu); return -EINVAL; } - trace_thermal_power_cpu_limit(&cpufreq_cdev->allowed_cpus, - target_freq, *state, power); + trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state, + power); return 0; } @@ -786,8 +757,6 @@ __cpufreq_cooling_register(struct device_node *np, goto free_time_in_idle_timestamp; } - cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus); - if (capacitance) { cpufreq_cdev->plat_get_static_power = plat_static_func; From 349d39dc57396e3e9f39170905ff8d626eea9e44 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:19 +0530 Subject: [PATCH 12/24] thermal: cpu_cooling: merge frequency and power tables The cpu_cooling driver keeps two tables: - freq_table: table of frequencies in descending order, built from policy->freq_table. - power_table: table of frequencies and power in ascending order, built from OPP table. If the OPPs are used for the CPU device then both these tables are actually built using the OPP core and should have the same frequency entries. And there is no need to keep separate tables for this. Lets merge them both. Note that the new table is in descending order of frequencies and so the 'for' loops were required to be fixed at few places to make it work. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 153 +++++++++++++++------------------- 1 file changed, 67 insertions(+), 86 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 94e7121817bf..67ec52d5f7fc 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -49,14 +49,14 @@ */ /** - * struct power_table - frequency to power conversion + * struct freq_table - frequency table along with power entries * @frequency: frequency in KHz * @power: power in mW * * This structure is built when the cooling device registers and helps - * in translating frequency to power and viceversa. + * in translating frequency to power and vice versa. */ -struct power_table { +struct freq_table { u32 frequency; u32 power; }; @@ -79,9 +79,6 @@ struct power_table { * @time_in_idle: previous reading of the absolute time that this cpu was idle * @time_in_idle_timestamp: wall time of the last invocation of * get_cpu_idle_time_us() - * @dyn_power_table: array of struct power_table for frequency to power - * conversion, sorted in ascending order. - * @dyn_power_table_entries: number of entries in the @dyn_power_table array * @cpu_dev: the cpu_device of policy->cpu. * @plat_get_static_power: callback to calculate the static power * @@ -95,13 +92,11 @@ struct cpufreq_cooling_device { unsigned int cpufreq_state; unsigned int clipped_freq; unsigned int max_level; - unsigned int *freq_table; /* In descending order */ + struct freq_table *freq_table; /* In descending order */ struct list_head node; u32 last_load; u64 *time_in_idle; u64 *time_in_idle_timestamp; - struct power_table *dyn_power_table; - int dyn_power_table_entries; struct device *cpu_dev; get_static_t plat_get_static_power; }; @@ -125,10 +120,10 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, unsigned long level; for (level = 0; level <= cpufreq_cdev->max_level; level++) { - if (freq == cpufreq_cdev->freq_table[level]) + if (freq == cpufreq_cdev->freq_table[level].frequency) return level; - if (freq > cpufreq_cdev->freq_table[level]) + if (freq > cpufreq_cdev->freq_table[level].frequency) break; } @@ -189,28 +184,25 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, } /** - * build_dyn_power_table() - create a dynamic power to frequency table - * @cpufreq_cdev: the cpufreq cooling device in which to store the table + * update_freq_table() - Update the freq table with power numbers + * @cpufreq_cdev: the cpufreq cooling device in which to update the table * @capacitance: dynamic power coefficient for these cpus * - * Build a dynamic power to frequency table for this cpu and store it - * in @cpufreq_cdev. This table will be used in cpu_power_to_freq() and - * cpu_freq_to_power() to convert between power and frequency - * efficiently. Power is stored in mW, frequency in KHz. The - * resulting table is in ascending order. + * Update the freq table with power numbers. This table will be used in + * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and + * frequency efficiently. Power is stored in mW, frequency in KHz. The + * resulting table is in descending order. * * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs, - * -ENOMEM if we run out of memory or -EAGAIN if an OPP was - * added/enabled while the function was executing. + * or -ENOMEM if we run out of memory. */ -static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, - u32 capacitance) +static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev, + u32 capacitance) { - struct power_table *power_table; + struct freq_table *freq_table = cpufreq_cdev->freq_table; struct dev_pm_opp *opp; struct device *dev = NULL; - int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i, ret = 0; - unsigned long freq; + int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i; dev = get_cpu_device(cpu); if (unlikely(!dev)) { @@ -223,25 +215,32 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, if (num_opps < 0) return num_opps; - if (num_opps == 0) + /* + * The cpufreq table is also built from the OPP table and so the count + * should match. + */ + if (num_opps != cpufreq_cdev->max_level + 1) { + dev_warn(dev, "Number of OPPs not matching with max_levels\n"); return -EINVAL; + } - power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL); - if (!power_table) - return -ENOMEM; - - for (freq = 0, i = 0; - opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp); - freq++, i++) { - u32 freq_mhz, voltage_mv; + for (i = 0; i <= cpufreq_cdev->max_level; i++) { + unsigned long freq = freq_table[i].frequency * 1000; + u32 freq_mhz = freq_table[i].frequency / 1000; u64 power; + u32 voltage_mv; - if (i >= num_opps) { - ret = -EAGAIN; - goto free_power_table; + /* + * Find ceil frequency as 'freq' may be slightly lower than OPP + * freq due to truncation while converting to kHz. + */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) { + dev_err(dev, "failed to get opp for %lu frequency\n", + freq); + return -EINVAL; } - freq_mhz = freq / 1000000; voltage_mv = dev_pm_opp_get_voltage(opp) / 1000; dev_pm_opp_put(opp); @@ -252,54 +251,39 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev, power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv; do_div(power, 1000000000); - /* frequency is stored in power_table in KHz */ - power_table[i].frequency = freq / 1000; - /* power is stored in mW */ - power_table[i].power = power; - } - - if (i != num_opps) { - ret = PTR_ERR(opp); - goto free_power_table; + freq_table[i].power = power; } cpufreq_cdev->cpu_dev = dev; - cpufreq_cdev->dyn_power_table = power_table; - cpufreq_cdev->dyn_power_table_entries = i; return 0; - -free_power_table: - kfree(power_table); - - return ret; } static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, u32 freq) { int i; - struct power_table *pt = cpufreq_cdev->dyn_power_table; + struct freq_table *freq_table = cpufreq_cdev->freq_table; - for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++) - if (freq < pt[i].frequency) + for (i = 1; i <= cpufreq_cdev->max_level; i++) + if (freq > freq_table[i].frequency) break; - return pt[i - 1].power; + return freq_table[i - 1].power; } static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, u32 power) { int i; - struct power_table *pt = cpufreq_cdev->dyn_power_table; + struct freq_table *freq_table = cpufreq_cdev->freq_table; - for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++) - if (power < pt[i].power) + for (i = 1; i <= cpufreq_cdev->max_level; i++) + if (power > freq_table[i].power) break; - return pt[i - 1].frequency; + return freq_table[i - 1].frequency; } /** @@ -466,7 +450,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, if (cpufreq_cdev->cpufreq_state == state) return 0; - clip_freq = cpufreq_cdev->freq_table[state]; + clip_freq = cpufreq_cdev->freq_table[state].frequency; cpufreq_cdev->cpufreq_state = state; cpufreq_cdev->clipped_freq = clip_freq; @@ -579,7 +563,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus); - freq = cpufreq_cdev->freq_table[state]; + freq = cpufreq_cdev->freq_table[state].frequency; if (!freq) return -EINVAL; @@ -757,31 +741,20 @@ __cpufreq_cooling_register(struct device_node *np, goto free_time_in_idle_timestamp; } - if (capacitance) { - cpufreq_cdev->plat_get_static_power = plat_static_func; - - ret = build_dyn_power_table(cpufreq_cdev, capacitance); - if (ret) { - cdev = ERR_PTR(ret); - goto free_table; - } - - cooling_ops = &cpufreq_power_cooling_ops; - } else { - cooling_ops = &cpufreq_cooling_ops; - } - ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL); if (ret < 0) { cdev = ERR_PTR(ret); - goto free_power_table; + goto free_table; } cpufreq_cdev->id = ret; + snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", + cpufreq_cdev->id); + /* Fill freq-table in descending order of frequencies */ for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) { freq = find_next_max(policy->freq_table, freq); - cpufreq_cdev->freq_table[i] = freq; + cpufreq_cdev->freq_table[i].frequency = freq; /* Warn for duplicate entries */ if (!freq) @@ -790,15 +763,26 @@ __cpufreq_cooling_register(struct device_node *np, pr_debug("%s: freq:%u KHz\n", __func__, freq); } - snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", - cpufreq_cdev->id); + if (capacitance) { + cpufreq_cdev->plat_get_static_power = plat_static_func; + + ret = update_freq_table(cpufreq_cdev, capacitance); + if (ret) { + cdev = ERR_PTR(ret); + goto remove_ida; + } + + cooling_ops = &cpufreq_power_cooling_ops; + } else { + cooling_ops = &cpufreq_cooling_ops; + } cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev, cooling_ops); if (IS_ERR(cdev)) goto remove_ida; - cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0]; + cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency; cpufreq_cdev->cdev = cdev; mutex_lock(&cooling_list_lock); @@ -815,8 +799,6 @@ __cpufreq_cooling_register(struct device_node *np, remove_ida: ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); -free_power_table: - kfree(cpufreq_cdev->dyn_power_table); free_table: kfree(cpufreq_cdev->freq_table); free_time_in_idle_timestamp: @@ -965,7 +947,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) thermal_cooling_device_unregister(cpufreq_cdev->cdev); ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); - kfree(cpufreq_cdev->dyn_power_table); kfree(cpufreq_cdev->time_in_idle_timestamp); kfree(cpufreq_cdev->time_in_idle); kfree(cpufreq_cdev->freq_table); From 81ee14da1051ee87c243b9a0e55d83f1aa274b76 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:20 +0530 Subject: [PATCH 13/24] thermal: cpu_cooling: create structure for idle time stats We keep two arrays for idle time stats and allocate memory for them separately. It would be much easier to follow if we create an array of idle stats structure instead and allocate it once. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 53 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 67ec52d5f7fc..11735dba1456 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -61,6 +61,16 @@ struct freq_table { u32 power; }; +/** + * struct time_in_idle - Idle time stats + * @time: previous reading of the absolute time that this cpu was idle + * @timestamp: wall time of the last invocation of get_cpu_idle_time_us() + */ +struct time_in_idle { + u64 time; + u64 timestamp; +}; + /** * struct cpufreq_cooling_device - data for cooling device with cpufreq * @id: unique integer value corresponding to each cpufreq_cooling_device @@ -76,9 +86,7 @@ struct freq_table { * cpufreq frequencies. * @node: list_head to link all cpufreq_cooling_device together. * @last_load: load measured by the latest call to cpufreq_get_requested_power() - * @time_in_idle: previous reading of the absolute time that this cpu was idle - * @time_in_idle_timestamp: wall time of the last invocation of - * get_cpu_idle_time_us() + * @idle_time: idle time stats * @cpu_dev: the cpu_device of policy->cpu. * @plat_get_static_power: callback to calculate the static power * @@ -95,8 +103,7 @@ struct cpufreq_cooling_device { struct freq_table *freq_table; /* In descending order */ struct list_head node; u32 last_load; - u64 *time_in_idle; - u64 *time_in_idle_timestamp; + struct time_in_idle *idle_time; struct device *cpu_dev; get_static_t plat_get_static_power; }; @@ -300,18 +307,19 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu, { u32 load; u64 now, now_idle, delta_time, delta_idle; + struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx]; now_idle = get_cpu_idle_time(cpu, &now, 0); - delta_idle = now_idle - cpufreq_cdev->time_in_idle[cpu_idx]; - delta_time = now - cpufreq_cdev->time_in_idle_timestamp[cpu_idx]; + delta_idle = now_idle - idle_time->time; + delta_time = now - idle_time->timestamp; if (delta_time <= delta_idle) load = 0; else load = div64_u64(100 * (delta_time - delta_idle), delta_time); - cpufreq_cdev->time_in_idle[cpu_idx] = now_idle; - cpufreq_cdev->time_in_idle_timestamp[cpu_idx] = now; + idle_time->time = now_idle; + idle_time->timestamp = now; return load; } @@ -715,22 +723,14 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_cdev->policy = policy; num_cpus = cpumask_weight(policy->related_cpus); - cpufreq_cdev->time_in_idle = kcalloc(num_cpus, - sizeof(*cpufreq_cdev->time_in_idle), - GFP_KERNEL); - if (!cpufreq_cdev->time_in_idle) { + cpufreq_cdev->idle_time = kcalloc(num_cpus, + sizeof(*cpufreq_cdev->idle_time), + GFP_KERNEL); + if (!cpufreq_cdev->idle_time) { cdev = ERR_PTR(-ENOMEM); goto free_cdev; } - cpufreq_cdev->time_in_idle_timestamp = - kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp), - GFP_KERNEL); - if (!cpufreq_cdev->time_in_idle_timestamp) { - cdev = ERR_PTR(-ENOMEM); - goto free_time_in_idle; - } - /* max_level is an index, not a counter */ cpufreq_cdev->max_level = i - 1; @@ -738,7 +738,7 @@ __cpufreq_cooling_register(struct device_node *np, GFP_KERNEL); if (!cpufreq_cdev->freq_table) { cdev = ERR_PTR(-ENOMEM); - goto free_time_in_idle_timestamp; + goto free_idle_time; } ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL); @@ -801,10 +801,8 @@ remove_ida: ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); free_table: kfree(cpufreq_cdev->freq_table); -free_time_in_idle_timestamp: - kfree(cpufreq_cdev->time_in_idle_timestamp); -free_time_in_idle: - kfree(cpufreq_cdev->time_in_idle); +free_idle_time: + kfree(cpufreq_cdev->idle_time); free_cdev: kfree(cpufreq_cdev); return cdev; @@ -947,8 +945,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) thermal_cooling_device_unregister(cpufreq_cdev->cdev); ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); - kfree(cpufreq_cdev->time_in_idle_timestamp); - kfree(cpufreq_cdev->time_in_idle); + kfree(cpufreq_cdev->idle_time); kfree(cpufreq_cdev->freq_table); kfree(cpufreq_cdev); } From da27f69d6a4eaf7bdaaad9d9e1b997a1c8f186aa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:21 +0530 Subject: [PATCH 14/24] thermal: cpu_cooling: get_level() can't fail The frequency passed to get_level() is returned by cpu_power_to_freq() and it is guaranteed that get_level() can't fail. Get rid of error code. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 11735dba1456..2c2d76ac04c3 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -119,22 +119,19 @@ static LIST_HEAD(cpufreq_cdev_list); * @cpufreq_cdev: cpufreq_cdev for which the property is required * @freq: Frequency * - * Return: level on success, THERMAL_CSTATE_INVALID on error. + * Return: level corresponding to the frequency. */ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, unsigned int freq) { + struct freq_table *freq_table = cpufreq_cdev->freq_table; unsigned long level; - for (level = 0; level <= cpufreq_cdev->max_level; level++) { - if (freq == cpufreq_cdev->freq_table[level].frequency) - return level; - - if (freq > cpufreq_cdev->freq_table[level].frequency) + for (level = 1; level <= cpufreq_cdev->max_level; level++) + if (freq > freq_table[level].frequency) break; - } - return THERMAL_CSTATE_INVALID; + return level - 1; } /** @@ -627,13 +624,6 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power); *state = get_level(cpufreq_cdev, target_freq); - if (*state == THERMAL_CSTATE_INVALID) { - dev_err_ratelimited(&cdev->device, - "Failed to convert %dKHz for cpu %d into a cdev state\n", - target_freq, policy->cpu); - return -EINVAL; - } - trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state, power); return 0; From 53ca1ece60a719e23f686c647cb05d9e12a31974 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:22 +0530 Subject: [PATCH 15/24] thermal: cpu_cooling: don't store cpu_dev in cpufreq_cdev 'cpu_dev' is used by only one function, get_static_power(), and it wouldn't be time consuming to get the cpu device structure within it. This would help removing cpu_dev from struct cpufreq_cooling_device. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 2c2d76ac04c3..bfa9731e7cdb 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -87,7 +87,6 @@ struct time_in_idle { * @node: list_head to link all cpufreq_cooling_device together. * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @idle_time: idle time stats - * @cpu_dev: the cpu_device of policy->cpu. * @plat_get_static_power: callback to calculate the static power * * This structure is required for keeping information of each registered @@ -104,7 +103,6 @@ struct cpufreq_cooling_device { struct list_head node; u32 last_load; struct time_in_idle *idle_time; - struct device *cpu_dev; get_static_t plat_get_static_power; }; @@ -259,8 +257,6 @@ static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev, freq_table[i].power = power; } - cpufreq_cdev->cpu_dev = dev; - return 0; } @@ -342,19 +338,22 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev, { struct dev_pm_opp *opp; unsigned long voltage; - struct cpumask *cpumask = cpufreq_cdev->policy->related_cpus; + struct cpufreq_policy *policy = cpufreq_cdev->policy; + struct cpumask *cpumask = policy->related_cpus; unsigned long freq_hz = freq * 1000; + struct device *dev; - if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) { + if (!cpufreq_cdev->plat_get_static_power) { *power = 0; return 0; } - opp = dev_pm_opp_find_freq_exact(cpufreq_cdev->cpu_dev, freq_hz, - true); + dev = get_cpu_device(policy->cpu); + WARN_ON(!dev); + + opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true); if (IS_ERR(opp)) { - dev_warn_ratelimited(cpufreq_cdev->cpu_dev, - "Failed to find OPP for frequency %lu: %ld\n", + dev_warn_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", freq_hz, PTR_ERR(opp)); return -EINVAL; } @@ -363,8 +362,7 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev, dev_pm_opp_put(opp); if (voltage == 0) { - dev_err_ratelimited(cpufreq_cdev->cpu_dev, - "Failed to get voltage for frequency %lu\n", + dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n", freq_hz); return -EINVAL; } From cb1b631864ad7e34324ceccb73d0226dd00ad90d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:23 +0530 Subject: [PATCH 16/24] thermal: cpu_cooling: 'freq' can't be zero in cpufreq_state2power() The frequency table shouldn't have any zero frequency entries and so such a check isn't required. Though it would be better to make sure 'state' is within limits. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index bfa9731e7cdb..9cbf7bdbeb7a 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -564,12 +564,13 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, int ret; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; + /* Request state should be less than max_level */ + if (WARN_ON(state > cpufreq_cdev->max_level)) + return -EINVAL; + num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus); freq = cpufreq_cdev->freq_table[state].frequency; - if (!freq) - return -EINVAL; - dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); if (ret) From d72b4015839cca333a8e0108b1739a50f03d2acc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:24 +0530 Subject: [PATCH 17/24] thermal: cpu_cooling: Rearrange struct cpufreq_cooling_device This shrinks the size of the structure on arm64 by 8 bytes by avoiding padding of 4 bytes at two places. Also add missing doc comment for freq_table Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 9cbf7bdbeb7a..1305020790b2 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -75,17 +75,18 @@ struct time_in_idle { * struct cpufreq_cooling_device - data for cooling device with cpufreq * @id: unique integer value corresponding to each cpufreq_cooling_device * registered. - * @cdev: thermal_cooling_device pointer to keep track of the - * registered cooling device. - * @policy: cpufreq policy. + * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @cpufreq_state: integer value representing the current state of cpufreq * cooling devices. * @clipped_freq: integer value representing the absolute value of the clipped * frequency. * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. + * @freq_table: Freq table in descending order of frequencies + * @cdev: thermal_cooling_device pointer to keep track of the + * registered cooling device. + * @policy: cpufreq policy. * @node: list_head to link all cpufreq_cooling_device together. - * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @idle_time: idle time stats * @plat_get_static_power: callback to calculate the static power * @@ -94,14 +95,14 @@ struct time_in_idle { */ struct cpufreq_cooling_device { int id; - struct thermal_cooling_device *cdev; - struct cpufreq_policy *policy; + u32 last_load; unsigned int cpufreq_state; unsigned int clipped_freq; unsigned int max_level; struct freq_table *freq_table; /* In descending order */ + struct thermal_cooling_device *cdev; + struct cpufreq_policy *policy; struct list_head node; - u32 last_load; struct time_in_idle *idle_time; get_static_t plat_get_static_power; }; From f19b1a1735f7453c35031ed5ca3883f20176dde6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 May 2017 12:33:06 +0530 Subject: [PATCH 18/24] thermal: cpu_cooling: Replace kmalloc with kmalloc_array Checkpatch reports following: WARNING: Prefer kmalloc_array over kmalloc with multiply + cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i, Fix that. Signed-off-by: Viresh Kumar Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 1305020790b2..908a8014cf76 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -724,8 +724,9 @@ __cpufreq_cooling_register(struct device_node *np, /* max_level is an index, not a counter */ cpufreq_cdev->max_level = i - 1; - cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i, - GFP_KERNEL); + cpufreq_cdev->freq_table = kmalloc_array(i, + sizeof(*cpufreq_cdev->freq_table), + GFP_KERNEL); if (!cpufreq_cdev->freq_table) { cdev = ERR_PTR(-ENOMEM); goto free_idle_time; From 0d76d6e1eede5f2aa13695cb4c9d763bb3555e3e Mon Sep 17 00:00:00 2001 From: Willy WOLFF Date: Sat, 24 Jun 2017 14:06:03 +0100 Subject: [PATCH 19/24] thermal: fix source code documentation for parameters Some parameters are not documented, or not present at all, in thermal governors code. Signed-off-by: Willy Wolff Signed-off-by: Zhang Rui --- drivers/thermal/fair_share.c | 1 + drivers/thermal/step_wise.c | 3 +-- drivers/thermal/user_space.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index 68bd1b569118..d3469fbc5207 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -71,6 +71,7 @@ static long get_target_state(struct thermal_zone_device *tz, /** * fair_share_throttle - throttles devices associated with the given zone * @tz - thermal_zone_device + * @trip - trip point index * * Throttling Logic: This uses three parameters to calculate the new * throttle state of the cooling devices associated with the given zone. diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index bcef2e7c4ec9..be95826631b7 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -186,8 +186,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) /** * step_wise_throttle - throttles devices associated with the given zone * @tz - thermal_zone_device - * @trip - the trip point - * @trip_type - type of the trip point + * @trip - trip point index * * Throttling Logic: This uses the trend of the thermal zone to throttle. * If the thermal zone is 'heating up' this throttles all the cooling diff --git a/drivers/thermal/user_space.c b/drivers/thermal/user_space.c index c908150c268d..8e92a06ef48a 100644 --- a/drivers/thermal/user_space.c +++ b/drivers/thermal/user_space.c @@ -24,12 +24,13 @@ #include #include + #include "thermal_core.h" /** * notify_user_space - Notifies user space about thermal events * @tz - thermal_zone_device - * @trip - Trip point index + * @trip - trip point index * * This function notifies the user space through UEvents. */ From 86326031e3d4dec828d966c19fe9fe1371d0a0a8 Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Tue, 30 May 2017 23:14:58 +0530 Subject: [PATCH 20/24] Thermal/int340x: Fix few typos and kernel-doc style This patch fix the few typos in function header of acpi_parse_trt. Also, fix the typo in kernel debug message for acpi_parse_art. Signed-off-by: Sumeet Pawnikar Signed-off-by: Zhang Rui --- drivers/thermal/int340x_thermal/acpi_thermal_rel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c index 2c2ec7666eb1..51ceb80212a7 100644 --- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c @@ -62,8 +62,8 @@ static int acpi_thermal_rel_release(struct inode *inode, struct file *file) * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling * * @handle: ACPI handle of the device contains _TRT - * @art_count: the number of valid entries resulted from parsing _TRT - * @artp: pointer to pointer of array of art entries in parsing result + * @trt_count: the number of valid entries resulted from parsing _TRT + * @trtp: pointer to pointer of array of _TRT entries in parsing result * @create_dev: whether to create platform devices for target and source * */ @@ -208,7 +208,7 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, if (art->target) { result = acpi_bus_get_device(art->target, &adev); if (result) - pr_warn("Failed to get source ACPI device\n"); + pr_warn("Failed to get target ACPI device\n"); } } From 4ca0e75e46b6ee408bdc334fff449323d476812c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 6 Jun 2017 16:00:41 -0700 Subject: [PATCH 21/24] thermal: int340x: check for sensor when PTYP is missing For INT3403 sensor PTYP field is mandatory. But some platforms didn't have this field for sensors. This cause load failure for int3403 driver. This change checks for the presence of _TMP method and if present, then treats this device as a sensor. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- drivers/thermal/int340x_thermal/int3403_thermal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index c4890c9437eb..8a7f24dd9315 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -238,8 +238,16 @@ static int int3403_add(struct platform_device *pdev) status = acpi_evaluate_integer(priv->adev->handle, "PTYP", NULL, &priv->type); if (ACPI_FAILURE(status)) { - result = -EINVAL; - goto err; + unsigned long long tmp; + + status = acpi_evaluate_integer(priv->adev->handle, "_TMP", + NULL, &tmp); + if (ACPI_FAILURE(status)) { + result = -EINVAL; + goto err; + } else { + priv->type = INT3403_TYPE_SENSOR; + } } platform_set_drvdata(pdev, priv); From e3bdc8d7623d5875403ad40443e7b049ae200fcd Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 6 Jun 2017 15:12:37 +0530 Subject: [PATCH 22/24] thermal: imx: Handle return value of clk_prepare_enable clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Eduardo Valentin --- drivers/thermal/imx_thermal.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index f7ec39f46ee4..4798b4b1fd77 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -660,8 +660,11 @@ static int imx_thermal_resume(struct device *dev) { struct imx_thermal_data *data = dev_get_drvdata(dev); struct regmap *map = data->tempmon; + int ret; - clk_prepare_enable(data->thermal_clk); + ret = clk_prepare_enable(data->thermal_clk); + if (ret) + return ret; /* Enabled thermal sensor after resume */ regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); From 919054fdfc8adf58c5512fe9872eb53ea0f5525d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 6 Jun 2017 15:04:46 +0530 Subject: [PATCH 23/24] thermal: hisilicon: Handle return value of clk_prepare_enable clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Eduardo Valentin --- drivers/thermal/hisi_thermal.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index f6429666a1cf..9c3ce341eb97 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -397,8 +397,11 @@ static int hisi_thermal_suspend(struct device *dev) static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); + int ret; - clk_prepare_enable(data->clk); + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; data->irq_enabled = true; hisi_thermal_enable_bind_irq_sensor(data); From 1fe3854a83b580727c9464b37b62ba77ead1d6f6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 12:13:27 +0300 Subject: [PATCH 24/24] thermal: bcm2835: fix an error code in probe() This causes a static checker because we're passing a valid pointer to PTR_ERR(). "err" is already the correct error code, so we can just delete this line. Fixes: bcb7dd9ef206 ("thermal: bcm2835: add thermal driver for bcm2835 SoC") Acked-by: Stefan Wahren Signed-off-by: Dan Carpenter Signed-off-by: Eduardo Valentin --- drivers/thermal/broadcom/bcm2835_thermal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 0ecf80890c84..e6863c841662 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -245,7 +245,6 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) */ err = tz->ops->get_trip_temp(tz, 0, &trip_temp); if (err < 0) { - err = PTR_ERR(tz); dev_err(&pdev->dev, "Not able to read trip_temp: %d\n", err);