From 5fd7a09cfb8c6852f596c1f8c891c6158395250e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2015 18:03:23 +0200 Subject: [PATCH 1/9] atomic: Export fetch_or() Export fetch_or() that's implemented and used internally by the scheduler. We are going to use it for NO_HZ so make it generally available. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/atomic.h | 21 +++++++++++++++++++++ kernel/sched/core.c | 14 -------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 301de78d65f7..6c502cb13c95 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +/** + * fetch_or - perform *ptr |= mask and return old value of *ptr + * @ptr: pointer to value + * @mask: mask to OR on the value + * + * cmpxchg based fetch_or, macro so it works for different integer types + */ +#ifndef fetch_or +#define fetch_or(ptr, mask) \ +({ typeof(*(ptr)) __old, __val = *(ptr); \ + for (;;) { \ + __old = cmpxchg((ptr), __val, __val | (mask)); \ + if (__old == __val) \ + break; \ + __val = __old; \ + } \ + __old; \ +}) +#endif + + #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9503d590e5ef..7142feb4b92d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -453,20 +453,6 @@ static inline void init_hrtick(void) } #endif /* CONFIG_SCHED_HRTICK */ -/* - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#define fetch_or(ptr, val) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (val)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) - #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) /* * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, From 8537bb95a63e4be330359721f0ecd422f4a4c0ca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Dec 2015 16:55:23 +0100 Subject: [PATCH 2/9] nohz: Implement wide kick on top of irq work It simplifies it and allows wide kick to be performed, even when IRQs are disabled, without an asynchronous level in the middle. This comes at a cost of some more overhead on features like perf and posix cpu timers slow-paths, which is probably not much important for nohz full users. Requested-by: Peter Zijlstra Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/time/tick-sched.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0b17424349eb..548a4e2551a9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -234,24 +234,20 @@ void tick_nohz_full_kick_cpu(int cpu) irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); } -static void nohz_full_kick_ipi(void *info) -{ - /* Empty, the tick restart happens on tick_nohz_irq_exit() */ -} - /* * Kick all full dynticks CPUs in order to force these to re-evaluate * their dependency on the tick and restart it if necessary. */ void tick_nohz_full_kick_all(void) { + int cpu; + if (!tick_nohz_full_running) return; preempt_disable(); - smp_call_function_many(tick_nohz_full_mask, - nohz_full_kick_ipi, NULL, false); - tick_nohz_full_kick(); + for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask) + tick_nohz_full_kick_cpu(cpu); preempt_enable(); } From d027d45d8a17a4145eab2d841140e9acbb7feb59 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 7 Jun 2015 15:54:30 +0200 Subject: [PATCH 3/9] nohz: New tick dependency mask The tick dependency is evaluated on every IRQ and context switch. This consists is a batch of checks which determine whether it is safe to stop the tick or not. These checks are often split in many details: posix cpu timers, scheduler, sched clock, perf events.... each of which are made of smaller details: posix cpu timer involves checking process wide timers then thread wide timers. Perf involves checking freq events then more per cpu details. Checking these informations asynchronously every time we update the full dynticks state bring avoidable overhead and a messy layout. Let's introduce instead tick dependency masks: one for system wide dependency (unstable sched clock, freq based perf events), one for CPU wide dependency (sched, throttling perf events), and task/signal level dependencies (posix cpu timers). The subsystems are responsible for setting and clearing their dependency through a set of APIs that will take care of concurrent dependency mask modifications and kick targets to restart the relevant CPU tick whenever needed. This new dependency engine stays beside the old one until all subsystems having a tick dependency are converted to it. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 8 +++ include/linux/tick.h | 92 ++++++++++++++++++++++++ kernel/time/tick-sched.c | 150 +++++++++++++++++++++++++++++++++++++-- kernel/time/tick-sched.h | 1 + 4 files changed, 244 insertions(+), 7 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..307e48375f21 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -719,6 +719,10 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif + struct list_head cpu_timers[3]; struct pid *tty_old_pgrp; @@ -1542,6 +1546,10 @@ struct task_struct { VTIME_SYS, } vtime_snap_whence; #endif + +#ifdef CONFIG_NO_HZ_FULL + unsigned long tick_dep_mask; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ u64 real_start_time; /* boot based time in nsec */ diff --git a/include/linux/tick.h b/include/linux/tick.h index 97fd4e543846..d60e5df8ec28 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void) tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } +enum tick_dep_bits { + TICK_DEP_BIT_POSIX_TIMER = 0, + TICK_DEP_BIT_PERF_EVENTS = 1, + TICK_DEP_BIT_SCHED = 2, + TICK_DEP_BIT_CLOCK_UNSTABLE = 3 +}; + +#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) +#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) +#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) +#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); @@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void) return cpumask_any_and(housekeeping_mask, cpu_online_mask); } +extern void tick_nohz_dep_set(enum tick_dep_bits bit); +extern void tick_nohz_dep_clear(enum tick_dep_bits bit); +extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit); +extern void tick_nohz_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit); +extern void tick_nohz_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit); +extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit); + +/* + * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases + * on top of static keys. + */ +static inline void tick_dep_set(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set(bit); +} + +static inline void tick_dep_clear(enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear(bit); +} + +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_set_cpu(cpu, bit); +} + +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) +{ + if (tick_nohz_full_cpu(cpu)) + tick_nohz_dep_clear_cpu(cpu, bit); +} + +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_task(tsk, bit); +} +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_task(tsk, bit); +} +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_set_signal(signal, bit); +} +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) +{ + if (tick_nohz_full_enabled()) + tick_nohz_dep_clear_signal(signal, bit); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); @@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void) static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } + +static inline void tick_dep_set(enum tick_dep_bits bit) { } +static inline void tick_dep_clear(enum tick_dep_bits bit) { } +static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_dep_set_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_task(struct task_struct *tsk, + enum tick_dep_bits bit) { } +static inline void tick_dep_set_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } +static inline void tick_dep_clear_signal(struct signal_struct *signal, + enum tick_dep_bits bit) { } + static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 548a4e2551a9..74ab7dbdc023 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,11 +158,53 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) cpumask_var_t tick_nohz_full_mask; cpumask_var_t housekeeping_mask; bool tick_nohz_full_running; +static unsigned long tick_dep_mask; -static bool can_stop_full_tick(void) +static void trace_tick_dependency(unsigned long dep) +{ + if (dep & TICK_DEP_MASK_POSIX_TIMER) { + trace_tick_stop(0, "posix timers running\n"); + return; + } + + if (dep & TICK_DEP_MASK_PERF_EVENTS) { + trace_tick_stop(0, "perf events running\n"); + return; + } + + if (dep & TICK_DEP_MASK_SCHED) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); + return; + } + + if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE) + trace_tick_stop(0, "unstable sched clock\n"); +} + +static bool can_stop_full_tick(struct tick_sched *ts) { WARN_ON_ONCE(!irqs_disabled()); + if (tick_dep_mask) { + trace_tick_dependency(tick_dep_mask); + return false; + } + + if (ts->tick_dep_mask) { + trace_tick_dependency(ts->tick_dep_mask); + return false; + } + + if (current->tick_dep_mask) { + trace_tick_dependency(current->tick_dep_mask); + return false; + } + + if (current->signal->tick_dep_mask) { + trace_tick_dependency(current->signal->tick_dep_mask); + return false; + } + if (!sched_can_stop_tick()) { trace_tick_stop(0, "more than 1 task in runqueue\n"); return false; @@ -178,9 +220,10 @@ static bool can_stop_full_tick(void) return false; } - /* sched_clock_tick() needs us? */ #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* + * sched_clock_tick() needs us? + * * TODO: kick full dynticks CPUs when * sched_clock_stable is set. */ @@ -199,13 +242,13 @@ static bool can_stop_full_tick(void) return true; } -static void nohz_full_kick_work_func(struct irq_work *work) +static void nohz_full_kick_func(struct irq_work *work) { /* Empty, the tick restart happens on tick_nohz_irq_exit() */ } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { - .func = nohz_full_kick_work_func, + .func = nohz_full_kick_func, }; /* @@ -251,6 +294,95 @@ void tick_nohz_full_kick_all(void) preempt_enable(); } +static void tick_nohz_dep_set_all(unsigned long *dep, + enum tick_dep_bits bit) +{ + unsigned long prev; + + prev = fetch_or(dep, BIT_MASK(bit)); + if (!prev) + tick_nohz_full_kick_all(); +} + +/* + * Set a global tick dependency. Used by perf events that rely on freq and + * by unstable clock. + */ +void tick_nohz_dep_set(enum tick_dep_bits bit) +{ + tick_nohz_dep_set_all(&tick_dep_mask, bit); +} + +void tick_nohz_dep_clear(enum tick_dep_bits bit) +{ + clear_bit(bit, &tick_dep_mask); +} + +/* + * Set per-CPU tick dependency. Used by scheduler and perf events in order to + * manage events throttling. + */ +void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) +{ + unsigned long prev; + struct tick_sched *ts; + + ts = per_cpu_ptr(&tick_cpu_sched, cpu); + + prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit)); + if (!prev) { + preempt_disable(); + /* Perf needs local kick that is NMI safe */ + if (cpu == smp_processor_id()) { + tick_nohz_full_kick(); + } else { + /* Remote irq work not NMI-safe */ + if (!WARN_ON_ONCE(in_nmi())) + tick_nohz_full_kick_cpu(cpu); + } + preempt_enable(); + } +} + +void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) +{ + struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); + + clear_bit(bit, &ts->tick_dep_mask); +} + +/* + * Set a per-task tick dependency. Posix CPU timers need this in order to elapse + * per task timers. + */ +void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) +{ + /* + * We could optimize this with just kicking the target running the task + * if that noise matters for nohz full users. + */ + tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit); +} + +void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) +{ + clear_bit(bit, &tsk->tick_dep_mask); +} + +/* + * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse + * per process timers. + */ +void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) +{ + tick_nohz_dep_set_all(&sig->tick_dep_mask, bit); +} + +void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) +{ + clear_bit(bit, &sig->tick_dep_mask); +} + /* * Re-evaluate the need for the tick as we switch the current task. * It might need the tick due to per task/process properties: @@ -259,15 +391,19 @@ void tick_nohz_full_kick_all(void) void __tick_nohz_task_switch(void) { unsigned long flags; + struct tick_sched *ts; local_irq_save(flags); if (!tick_nohz_full_cpu(smp_processor_id())) goto out; - if (tick_nohz_tick_stopped() && !can_stop_full_tick()) - tick_nohz_full_kick(); + ts = this_cpu_ptr(&tick_cpu_sched); + if (ts->tick_stopped) { + if (current->tick_dep_mask || current->signal->tick_dep_mask) + tick_nohz_full_kick(); + } out: local_irq_restore(flags); } @@ -736,7 +872,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) return; - if (can_stop_full_tick()) + if (can_stop_full_tick(ts)) tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); else if (ts->tick_stopped) tick_nohz_restart_sched_tick(ts, ktime_get(), 1); diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index a4a8d4e9baa1..eb4e32566a83 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -60,6 +60,7 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; int do_timer_last; + unsigned long tick_dep_mask; }; extern struct tick_sched *tick_get_tick_sched(int cpu); From e6e6cc22e067a6f44449aa8fd0328404079c3ba5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Dec 2015 03:27:25 +0100 Subject: [PATCH 4/9] nohz: Use enum code for tick stop failure tracing message It makes nohz tracing more lightweight, standard and easier to parse. Examples: user_loop-2904 [007] d..1 517.701126: tick_stop: success=1 dependency=NONE user_loop-2904 [007] dn.1 518.021181: tick_stop: success=0 dependency=SCHED posix_timers-6142 [007] d..1 1739.027400: tick_stop: success=0 dependency=POSIX_TIMER user_loop-5463 [007] dN.1 1185.931939: tick_stop: success=0 dependency=PERF_EVENTS Suggested-by: Peter Zijlstra Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 1 + include/trace/events/timer.h | 36 +++++++++++++++++++++++++++++++----- kernel/time/tick-sched.c | 18 +++++++++--------- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index d60e5df8ec28..96d276a923bf 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -104,6 +104,7 @@ enum tick_dep_bits { TICK_DEP_BIT_CLOCK_UNSTABLE = 3 }; +#define TICK_DEP_MASK_NONE 0 #define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) #define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 073b9ac245ba..51440131d337 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire, ); #ifdef CONFIG_NO_HZ_COMMON + +#define TICK_DEP_NAMES \ + tick_dep_name(NONE) \ + tick_dep_name(POSIX_TIMER) \ + tick_dep_name(PERF_EVENTS) \ + tick_dep_name(SCHED) \ + tick_dep_name_end(CLOCK_UNSTABLE) + +#undef tick_dep_name +#undef tick_dep_name_end + +#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); +#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); + +TICK_DEP_NAMES + +#undef tick_dep_name +#undef tick_dep_name_end + +#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, +#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep } + +#define show_tick_dep_name(val) \ + __print_symbolic(val, TICK_DEP_NAMES) + TRACE_EVENT(tick_stop, - TP_PROTO(int success, char *error_msg), + TP_PROTO(int success, int dependency), - TP_ARGS(success, error_msg), + TP_ARGS(success, dependency), TP_STRUCT__entry( __field( int , success ) - __string( msg, error_msg ) + __field( int , dependency ) ), TP_fast_assign( __entry->success = success; - __assign_str(msg, error_msg); + __entry->dependency = dependency; ), - TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) + TP_printk("success=%d dependency=%s", __entry->success, \ + show_tick_dep_name(__entry->dependency)) ); #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 74ab7dbdc023..47e5ac45ce69 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -163,22 +163,22 @@ static unsigned long tick_dep_mask; static void trace_tick_dependency(unsigned long dep) { if (dep & TICK_DEP_MASK_POSIX_TIMER) { - trace_tick_stop(0, "posix timers running\n"); + trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); return; } if (dep & TICK_DEP_MASK_PERF_EVENTS) { - trace_tick_stop(0, "perf events running\n"); + trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); return; } if (dep & TICK_DEP_MASK_SCHED) { - trace_tick_stop(0, "more than 1 task in runqueue\n"); + trace_tick_stop(0, TICK_DEP_MASK_SCHED); return; } if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE) - trace_tick_stop(0, "unstable sched clock\n"); + trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); } static bool can_stop_full_tick(struct tick_sched *ts) @@ -206,17 +206,17 @@ static bool can_stop_full_tick(struct tick_sched *ts) } if (!sched_can_stop_tick()) { - trace_tick_stop(0, "more than 1 task in runqueue\n"); + trace_tick_stop(0, TICK_DEP_MASK_SCHED); return false; } if (!posix_cpu_timers_can_stop_tick(current)) { - trace_tick_stop(0, "posix timers running\n"); + trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); return false; } if (!perf_event_can_stop_tick()) { - trace_tick_stop(0, "perf events running\n"); + trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); return false; } @@ -228,7 +228,7 @@ static bool can_stop_full_tick(struct tick_sched *ts) * sched_clock_stable is set. */ if (!sched_clock_stable()) { - trace_tick_stop(0, "unstable sched clock\n"); + trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); /* * Don't allow the user to think they can get * full NO_HZ with this machine. @@ -821,7 +821,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; - trace_tick_stop(1, " "); + trace_tick_stop(1, TICK_DEP_MASK_NONE); } /* From 555e0c1ef7ff49ee5ac3a1eb12de4a2e4722f63d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2015 17:42:29 +0200 Subject: [PATCH 5/9] perf: Migrate perf to use new tick dependency mask model Instead of providing asynchronous checks for the nohz subsystem to verify perf event tick dependency, migrate perf to the new mask. Perf needs the tick for two situations: 1) Freq events. We could set the tick dependency when those are installed on a CPU context. But setting a global dependency on top of the global freq events accounting is much easier. If people want that to be optimized, we can still refine that on the per-CPU tick dependency level. This patch dooesn't change the current behaviour anyway. 2) Throttled events: this is a per-cpu dependency. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 6 ---- include/linux/tick.h | 2 -- kernel/events/core.c | 65 ++++++++++++++++++++++++++++---------- kernel/time/tick-sched.c | 8 +---- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b35a61a481fa..d3ff88c13632 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1108,12 +1108,6 @@ static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) -extern bool perf_event_can_stop_tick(void); -#else -static inline bool perf_event_can_stop_tick(void) { return true; } -#endif - #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); #else diff --git a/include/linux/tick.h b/include/linux/tick.h index 96d276a923bf..0e63db4bc662 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -233,7 +233,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, tick_nohz_dep_clear_signal(signal, bit); } -extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); @@ -260,7 +259,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } -static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(void) { } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index 5946460b2425..f23d480052e3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3060,17 +3060,6 @@ done: return rotate; } -#ifdef CONFIG_NO_HZ_FULL -bool perf_event_can_stop_tick(void) -{ - if (atomic_read(&nr_freq_events) || - __this_cpu_read(perf_throttled_count)) - return false; - else - return true; -} -#endif - void perf_event_task_tick(void) { struct list_head *head = this_cpu_ptr(&active_ctx_list); @@ -3081,6 +3070,7 @@ void perf_event_task_tick(void) __this_cpu_inc(perf_throttled_seq); throttled = __this_cpu_xchg(perf_throttled_count, 0); + tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) perf_adjust_freq_unthr_context(ctx, throttled); @@ -3511,6 +3501,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) atomic_dec(&per_cpu(perf_cgroup_events, cpu)); } +#ifdef CONFIG_NO_HZ_FULL +static DEFINE_SPINLOCK(nr_freq_lock); +#endif + +static void unaccount_freq_event_nohz(void) +{ +#ifdef CONFIG_NO_HZ_FULL + spin_lock(&nr_freq_lock); + if (atomic_dec_and_test(&nr_freq_events)) + tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS); + spin_unlock(&nr_freq_lock); +#endif +} + +static void unaccount_freq_event(void) +{ + if (tick_nohz_full_enabled()) + unaccount_freq_event_nohz(); + else + atomic_dec(&nr_freq_events); +} + static void unaccount_event(struct perf_event *event) { bool dec = false; @@ -3527,7 +3539,7 @@ static void unaccount_event(struct perf_event *event) if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) - atomic_dec(&nr_freq_events); + unaccount_freq_event(); if (event->attr.context_switch) { dec = true; atomic_dec(&nr_switch_events); @@ -6349,9 +6361,9 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(throttle && hwc->interrupts >= max_samples_per_tick)) { __this_cpu_inc(perf_throttled_count); + tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); - tick_nohz_full_kick(); ret = 1; } } @@ -7741,6 +7753,27 @@ static void account_event_cpu(struct perf_event *event, int cpu) atomic_inc(&per_cpu(perf_cgroup_events, cpu)); } +/* Freq events need the tick to stay alive (see perf_event_task_tick). */ +static void account_freq_event_nohz(void) +{ +#ifdef CONFIG_NO_HZ_FULL + /* Lock so we don't race with concurrent unaccount */ + spin_lock(&nr_freq_lock); + if (atomic_inc_return(&nr_freq_events) == 1) + tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS); + spin_unlock(&nr_freq_lock); +#endif +} + +static void account_freq_event(void) +{ + if (tick_nohz_full_enabled()) + account_freq_event_nohz(); + else + atomic_inc(&nr_freq_events); +} + + static void account_event(struct perf_event *event) { bool inc = false; @@ -7756,10 +7789,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); - if (event->attr.freq) { - if (atomic_inc_return(&nr_freq_events) == 1) - tick_nohz_full_kick_all(); - } + if (event->attr.freq) + account_freq_event(); if (event->attr.context_switch) { atomic_inc(&nr_switch_events); inc = true; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 47e5ac45ce69..3f79def37ca9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -215,11 +214,6 @@ static bool can_stop_full_tick(struct tick_sched *ts) return false; } - if (!perf_event_can_stop_tick()) { - trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); - return false; - } - #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* * sched_clock_tick() needs us? @@ -257,7 +251,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), * is NMI safe. */ -void tick_nohz_full_kick(void) +static void tick_nohz_full_kick(void) { if (!tick_nohz_full_cpu(smp_processor_id())) return; From 01d36d0ac390895e719d0dd8ab91ebbbf506d28e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Nov 2015 18:17:10 +0100 Subject: [PATCH 6/9] sched: Account rr tasks In order to evaluate the scheduler tick dependency without probing context switches, we need to know how much SCHED_RR and SCHED_FIFO tasks are enqueued as those policies don't have the same preemption requirements. To prepare for that, let's account SCHED_RR tasks, we'll be able to deduce SCHED_FIFO tasks as well from it and the total RT tasks in the runqueue. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/rt.c | 16 ++++++++++++++++ kernel/sched/sched.h | 1 + 2 files changed, 17 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..3f1fcffbb18f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1141,6 +1141,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) return 1; } +static inline +unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se) +{ + struct rt_rq *group_rq = group_rt_rq(rt_se); + struct task_struct *tsk; + + if (group_rq) + return group_rq->rr_nr_running; + + tsk = rt_task_of(rt_se); + + return (tsk->policy == SCHED_RR) ? 1 : 0; +} + static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { @@ -1148,6 +1162,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(prio)); rt_rq->rt_nr_running += rt_se_nr_running(rt_se); + rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se); inc_rt_prio(rt_rq, prio); inc_rt_migration(rt_se, rt_rq); @@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); + rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se); dec_rt_prio(rt_rq, rt_se_prio(rt_se)); dec_rt_migration(rt_se, rt_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 10f16374df7f..f0abfce14044 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void) struct rt_rq { struct rt_prio_array active; unsigned int rt_nr_running; + unsigned int rr_nr_running; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED struct { int curr; /* highest queued rt task prio */ From 76d92ac305f23cada3a9b3c48a7ccea5f71019cb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 17 Jul 2015 22:25:49 +0200 Subject: [PATCH 7/9] sched: Migrate sched to use new tick dependency mask model Instead of providing asynchronous checks for the nohz subsystem to verify sched tick dependency, migrate sched to the new mask. Everytime a task is enqueued or dequeued, we evaluate the state of the tick dependency on top of the policy of the tasks in the runqueue, by order of priority: SCHED_DEADLINE: Need the tick in order to periodically check for runtime SCHED_FIFO : Don't need the tick (no round-robin) SCHED_RR : Need the tick if more than 1 task of the same priority for round robin (simplified with checking if more than one SCHED_RR task no matter what priority). SCHED_NORMAL : Need the tick if more than 1 task for round-robin. We could optimize that further with one flag per sched policy on the tick dependency mask and perform only the checks relevant to the policy concerned by an enqueue/dequeue operation. Since the checks aren't based on the current task anymore, we could get rid of the task switch hook but it's still needed for posix cpu timers. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 3 --- kernel/sched/core.c | 35 +++++++++++++++++------------- kernel/sched/sched.h | 47 ++++++++++++++++++++++++++++------------ kernel/time/tick-sched.c | 5 ----- 4 files changed, 53 insertions(+), 37 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 307e48375f21..2b10348806d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2364,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } #endif #ifdef CONFIG_NO_HZ_FULL -extern bool sched_can_stop_tick(void); extern u64 scheduler_tick_max_deferment(void); -#else -static inline bool sched_can_stop_tick(void) { return false; } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7142feb4b92d..1fad82364ffe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -701,31 +701,36 @@ static inline bool got_nohz_idle_kick(void) #endif /* CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -bool sched_can_stop_tick(void) +bool sched_can_stop_tick(struct rq *rq) { + int fifo_nr_running; + + /* Deadline tasks, even if single, need the tick */ + if (rq->dl.dl_nr_running) + return false; + /* - * FIFO realtime policy runs the highest priority task. Other runnable - * tasks are of a lower priority. The scheduler tick does nothing. + * FIFO realtime policy runs the highest priority task (after DEADLINE). + * Other runnable tasks are of a lower priority. The scheduler tick + * isn't needed. */ - if (current->policy == SCHED_FIFO) + fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; + if (fifo_nr_running) return true; /* * Round-robin realtime tasks time slice with other tasks at the same - * realtime priority. Is this task the only one at this priority? + * realtime priority. */ - if (current->policy == SCHED_RR) { - struct sched_rt_entity *rt_se = ¤t->rt; - - return list_is_singular(&rt_se->run_list); + if (rq->rt.rr_nr_running) { + if (rq->rt.rr_nr_running == 1) + return true; + else + return false; } - /* - * More than one running task need preemption. - * nr_running update is assumed to be visible - * after IPI is sent from wakers. - */ - if (this_rq()->nr_running > 1) + /* Normal multitasking need periodic preemption checks */ + if (rq->cfs.nr_running > 1) return false; return true; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f0abfce14044..4f0bca770108 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1279,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime); extern void init_entity_runnable_average(struct sched_entity *se); +#ifdef CONFIG_NO_HZ_FULL +extern bool sched_can_stop_tick(struct rq *rq); + +/* + * Tick may be needed by tasks in the runqueue depending on their policy and + * requirements. If tick is needed, lets send the target an IPI to kick it out of + * nohz mode if necessary. + */ +static inline void sched_update_tick_dependency(struct rq *rq) +{ + int cpu; + + if (!tick_nohz_full_enabled()) + return; + + cpu = cpu_of(rq); + + if (!tick_nohz_full_cpu(cpu)) + return; + + if (sched_can_stop_tick(rq)) + tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); + else + tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); +} +#else +static inline void sched_update_tick_dependency(struct rq *rq) { } +#endif + static inline void add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; @@ -1290,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count) if (!rq->rd->overload) rq->rd->overload = true; #endif - -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_cpu(rq->cpu)) { - /* - * Tick is needed if more than one task runs on a CPU. - * Send the target an IPI to kick it out of nohz mode. - * - * We assume that IPI implies full memory barrier and the - * new value of rq->nr_running is visible on reception - * from the target. - */ - tick_nohz_full_kick_cpu(rq->cpu); - } -#endif } + + sched_update_tick_dependency(rq); } static inline void sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; + /* Check if we still need preemption */ + sched_update_tick_dependency(rq); } static inline void rq_last_tick_reset(struct rq *rq) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3f79def37ca9..f312c60c3ed2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -204,11 +204,6 @@ static bool can_stop_full_tick(struct tick_sched *ts) return false; } - if (!sched_can_stop_tick()) { - trace_tick_stop(0, TICK_DEP_MASK_SCHED); - return false; - } - if (!posix_cpu_timers_can_stop_tick(current)) { trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); return false; From b78783000d5cb7c5994e6742e1d1ce594bfea15b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 17 Jul 2015 22:25:49 +0200 Subject: [PATCH 8/9] posix-cpu-timers: Migrate to use new tick dependency mask model Instead of providing asynchronous checks for the nohz subsystem to verify posix cpu timers tick dependency, migrate the latter to the new mask. In order to keep track of the running timers and expose the tick dependency accordingly, we must probe the timers queuing and dequeuing on threads and process lists. Unfortunately it implies both task and signal level dependencies. We should be able to further optimize this and merge all that on the task level dependency, at the cost of a bit of complexity and may be overhead. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/posix-timers.h | 3 -- include/linux/tick.h | 2 -- kernel/time/posix-cpu-timers.c | 52 +++++++--------------------------- kernel/time/tick-sched.c | 7 +---- 4 files changed, 12 insertions(+), 52 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..62d44c176071 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); - -bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); - void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/include/linux/tick.h b/include/linux/tick.h index 0e63db4bc662..21f73649a4dc 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -234,7 +234,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, } extern void tick_nohz_full_kick_cpu(int cpu); -extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else static inline int housekeeping_any_cpu(void) @@ -259,7 +258,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } -static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(void) { } #endif diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index f5e86d282d52..1cafba860b08 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) return err; } - /* * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. * This is called from sys_timer_create() and do_cpu_nanosleep() with the @@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer) cputime_expires->sched_exp = exp; break; } + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); + else + tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); } } @@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock, return 0; } -#ifdef CONFIG_NO_HZ_FULL -static void nohz_kick_work_fn(struct work_struct *work) -{ - tick_nohz_full_kick_all(); -} - -static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); - -/* - * We need the IPIs to be sent from sane process context. - * The posix cpu timers are always set with irqs disabled. - */ -static void posix_cpu_timer_kick_nohz(void) -{ - if (context_tracking_is_enabled()) - schedule_work(&nohz_kick_work); -} - -bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) -{ - if (!task_cputime_zero(&tsk->cputime_expires)) - return false; - - /* Check if cputimer is running. This is accessed without locking. */ - if (READ_ONCE(tsk->signal->cputimer.running)) - return false; - - return true; -} -#else -static inline void posix_cpu_timer_kick_nohz(void) { } -#endif - /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, sample_to_timespec(timer->it_clock, old_incr, &old->it_interval); } - if (!ret) - posix_cpu_timer_kick_nohz(); + return ret; } @@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk, __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } + if (task_cputime_zero(tsk_expires)) + tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); } static inline void stop_process_timers(struct signal_struct *sig) @@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig) /* Turn off cputimer->running. This is done without locking. */ WRITE_ONCE(cputimer->running, false); + tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); } static u32 onecputick; @@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) arm_timer(timer); unlock_task_sighand(p, &flags); - /* Kick full dynticks CPUs in case they need to tick on the new timer */ - posix_cpu_timer_kick_nohz(); out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; @@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } if (!*newval) - goto out; + return; *newval += now; } @@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, tsk->signal->cputime_expires.virt_exp = *newval; break; } -out: - posix_cpu_timer_kick_nohz(); + + tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f312c60c3ed2..e4f2916e66a9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -204,11 +204,6 @@ static bool can_stop_full_tick(struct tick_sched *ts) return false; } - if (!posix_cpu_timers_can_stop_tick(current)) { - trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); - return false; - } - #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* * sched_clock_tick() needs us? @@ -270,7 +265,7 @@ void tick_nohz_full_kick_cpu(int cpu) * Kick all full dynticks CPUs in order to force these to re-evaluate * their dependency on the tick and restart it if necessary. */ -void tick_nohz_full_kick_all(void) +static void tick_nohz_full_kick_all(void) { int cpu; From 4f49b90abb4aca6fe677c95fc352fd0674d489bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Jul 2015 17:03:52 +0200 Subject: [PATCH 9/9] sched-clock: Migrate to use new tick dependency mask model Instead of checking sched_clock_stable from the nohz subsystem to verify its tick dependency, migrate it to the new mask in order to include it to the all-in-one check. Reviewed-by: Chris Metcalf Cc: Christoph Lameter Cc: Chris Metcalf Cc: Ingo Molnar Cc: Luiz Capitulino Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- kernel/sched/clock.c | 5 +++++ kernel/time/tick-sched.c | 19 ------------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index bc54e84675da..fedb967a9841 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -61,6 +61,7 @@ #include #include #include +#include /* * Scheduler clock - returns current time in nanosec units. @@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void) { if (!sched_clock_stable()) static_key_slow_inc(&__sched_clock_stable); + + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } void set_sched_clock_stable(void) @@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work) /* XXX worry about clock continuity */ if (sched_clock_stable()) static_key_slow_dec(&__sched_clock_stable); + + tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); } static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e4f2916e66a9..969e6704c3c9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -204,25 +204,6 @@ static bool can_stop_full_tick(struct tick_sched *ts) return false; } -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - /* - * sched_clock_tick() needs us? - * - * TODO: kick full dynticks CPUs when - * sched_clock_stable is set. - */ - if (!sched_clock_stable()) { - trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); - /* - * Don't allow the user to think they can get - * full NO_HZ with this machine. - */ - WARN_ONCE(tick_nohz_full_running, - "NO_HZ FULL will not work with unstable sched clock"); - return false; - } -#endif - return true; }