From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 26 Aug 2014 19:12:21 -0500 Subject: [PATCH] percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t __get_cpu_var can paper over differences in the definitions of cpumask_var_t and either use the address of the cpumask variable directly or perform a fetch of the address of the struct cpumask allocated elsewhere. This is important particularly when using per cpu cpumask_var_t declarations because in one case we have an offset into a per cpu area to handle and in the other case we need to fetch a pointer from the offset. This patch introduces a new macro this_cpu_cpumask_var_ptr() that is defined where cpumask_var_t is defined and performs the proper actions. All use cases where __get_cpu_var is used with cpumask_var_t are converted to the use of this_cpu_cpumask_var_ptr(). Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/perf_event_p4.h | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/oprofile/op_model_p4.c | 2 +- include/linux/cpumask.h | 11 +++++++++++ kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce600f9bc78..1f5d5f2ffae6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); /* * The idea is to send one IPI per cluster. diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 98ab13058f89..ad1d91f475ab 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -372,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2997af6d2ccd..0a9a6da21e74 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -666,10 +666,19 @@ static inline size_t cpumask_size(void) * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) + bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); @@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; +#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) + static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 255ce138b652..4a608cfaecbd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); + struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfa3c86d0d68..197d659c144c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; struct rq *busiest; unsigned long flags; - struct cpumask *cpus = __get_cpu_var(load_balance_mask); + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { .sd = sd, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5f6edca4fafd..a4c50fce9b90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task);