From 00357f5ec5d67a52a175da6f29f85c2c19d59bc8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Dec 2017 15:06:50 +0100 Subject: [PATCH] sched/nohz: Clean up nohz enter/exit The primary observation is that nohz enter/exit is always from the current CPU, therefore NOHZ_TICK_STOPPED does not in fact need to be an atomic. Secondary is that we appear to have 2 nearly identical hooks in the nohz enter code, set_cpu_sd_state_idle() and nohz_balance_enter_idle(). Fold the whole set_cpu_sd_state thing into nohz_balance_{enter,exit}_idle. Removes an atomic op from both enter and exit paths. Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 2 - kernel/sched/core.c | 2 +- kernel/sched/fair.c | 83 +++++++++++++++++++------------------- kernel/sched/sched.h | 11 +++-- kernel/time/tick-sched.c | 7 ---- 5 files changed, 48 insertions(+), 57 deletions(-) diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 094217273ff9..b36f4cf38111 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -16,11 +16,9 @@ static inline void cpu_load_update_nohz_stop(void) { } #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } #endif #ifdef CONFIG_NO_HZ_COMMON diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8a10a2ce30a4..c7faeb7bd03a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5861,7 +5861,7 @@ int sched_cpu_dying(unsigned int cpu) calc_load_migrate(rq); update_max_interval(); - nohz_balance_exit_idle(cpu); + nohz_balance_exit_idle(rq); hrtick_clear(rq); return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 85232dad89c9..494d5db9a6cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9103,23 +9103,6 @@ static inline int find_new_ilb(void) return nr_cpu_ids; } -static inline void set_cpu_sd_state_busy(void) -{ - struct sched_domain *sd; - int cpu = smp_processor_id(); - - rcu_read_lock(); - sd = rcu_dereference(per_cpu(sd_llc, cpu)); - - if (!sd || !sd->nohz_idle) - goto unlock; - sd->nohz_idle = 0; - - atomic_inc(&sd->shared->nr_busy_cpus); -unlock: - rcu_read_unlock(); -} - /* * Kick a CPU to do the nohz balancing, if it is time for it. We pick the * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle @@ -9175,8 +9158,7 @@ static void nohz_balancer_kick(struct rq *rq) * We may be recently in ticked or tickless idle mode. At the first * busy tick after returning from idle, we will update the busy stats. */ - set_cpu_sd_state_busy(); - nohz_balance_exit_idle(cpu); + nohz_balance_exit_idle(rq); /* * None are in tickless mode and hence no need for NOHZ idle load @@ -9240,27 +9222,39 @@ out: kick_ilb(flags); } -void nohz_balance_exit_idle(unsigned int cpu) -{ - unsigned int flags = atomic_read(nohz_flags(cpu)); - - if (unlikely(flags & NOHZ_TICK_STOPPED)) { - /* - * Completely isolated CPUs don't ever set, so we must test. - */ - if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) { - cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); - } - - atomic_andnot(NOHZ_TICK_STOPPED, nohz_flags(cpu)); - } -} - -void set_cpu_sd_state_idle(void) +static void set_cpu_sd_state_busy(int cpu) +{ + struct sched_domain *sd; + + rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_llc, cpu)); + + if (!sd || !sd->nohz_idle) + goto unlock; + sd->nohz_idle = 0; + + atomic_inc(&sd->shared->nr_busy_cpus); +unlock: + rcu_read_unlock(); +} + +void nohz_balance_exit_idle(struct rq *rq) +{ + SCHED_WARN_ON(rq != this_rq()); + + if (likely(!rq->nohz_tick_stopped)) + return; + + rq->nohz_tick_stopped = 0; + cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + + set_cpu_sd_state_busy(rq->cpu); +} + +static void set_cpu_sd_state_idle(int cpu) { struct sched_domain *sd; - int cpu = smp_processor_id(); rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_llc, cpu)); @@ -9280,6 +9274,10 @@ unlock: */ void nohz_balance_enter_idle(int cpu) { + struct rq *rq = cpu_rq(cpu); + + SCHED_WARN_ON(cpu != smp_processor_id()); + /* If this CPU is going down, then nothing needs to be done: */ if (!cpu_active(cpu)) return; @@ -9288,16 +9286,19 @@ void nohz_balance_enter_idle(int cpu) if (!housekeeping_cpu(cpu, HK_FLAG_SCHED)) return; - if (atomic_read(nohz_flags(cpu)) & NOHZ_TICK_STOPPED) + if (rq->nohz_tick_stopped) return; /* If we're a completely isolated CPU, we don't play: */ - if (on_null_domain(cpu_rq(cpu))) + if (on_null_domain(rq)) return; + rq->nohz_tick_stopped = 1; + cpumask_set_cpu(cpu, nohz.idle_cpus_mask); atomic_inc(&nohz.nr_cpus); - atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu)); + + set_cpu_sd_state_idle(cpu); } #else static inline void nohz_balancer_kick(struct rq *rq) { } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 21381d276709..818f22dbc7ea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -764,6 +764,7 @@ struct rq { unsigned long last_load_update_tick; unsigned long last_blocked_load_update_tick; #endif /* CONFIG_SMP */ + unsigned int nohz_tick_stopped; atomic_t nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ @@ -2035,11 +2036,9 @@ extern void cfs_bandwidth_usage_inc(void); extern void cfs_bandwidth_usage_dec(void); #ifdef CONFIG_NO_HZ_COMMON -#define NOHZ_TICK_STOPPED_BIT 0 -#define NOHZ_BALANCE_KICK_BIT 1 -#define NOHZ_STATS_KICK_BIT 2 +#define NOHZ_BALANCE_KICK_BIT 0 +#define NOHZ_STATS_KICK_BIT 1 -#define NOHZ_TICK_STOPPED BIT(NOHZ_TICK_STOPPED_BIT) #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) @@ -2047,9 +2046,9 @@ extern void cfs_bandwidth_usage_dec(void); #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) -extern void nohz_balance_exit_idle(unsigned int cpu); +extern void nohz_balance_exit_idle(struct rq *rq); #else -static inline void nohz_balance_exit_idle(unsigned int cpu) { } +static inline void nohz_balance_exit_idle(struct rq *rq) { } #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f2fa2e940fe5..ab92aa4442df 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -954,13 +954,6 @@ void tick_nohz_idle_enter(void) struct tick_sched *ts; lockdep_assert_irqs_enabled(); - /* - * Update the idle state in the scheduler domain hierarchy - * when tick_nohz_stop_sched_tick() is called from the idle loop. - * State will be updated to busy during the first busy tick after - * exiting idle. - */ - set_cpu_sd_state_idle(); local_irq_disable();