From 9390675af0835ae1d654d33bfcf16096028550ad Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 22 Jan 2014 08:45:34 +0100 Subject: [PATCH 1/4] Revert "sched: Fix sleep time double accounting in enqueue entity" This reverts commit 282cf499f03ec1754b6c8c945c9674b02631fb0f. With the current implementation, the load average statistics of a sched entity change according to other activity on the CPU even if this activity is done between the running window of the sched entity and have no influence on the running duration of the task. When a task wakes up on the same CPU, we currently update last_runnable_update with the return of __synchronize_entity_decay without updating the runnable_avg_sum and runnable_avg_period accordingly. In fact, we have to sync the load_contrib of the se with the rq's blocked_load_contrib before removing it from the latter (with __synchronize_entity_decay) but we must keep last_runnable_update unchanged for updating runnable_avg_sum/period during the next update_entity_load_avg. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra Reviewed-by: Ben Segall Cc: pjt@google.com Cc: alex.shi@linaro.org Link: http://lkml.kernel.org/r/1390376734-6800-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b24b6cfde9aa..efe6457ac5c8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2356,13 +2356,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, } wakeup = 0; } else { - /* - * Task re-woke on same cpu (or else migrate_task_rq_fair() - * would have made count negative); we must be careful to avoid - * double-accounting blocked time after synchronizing decays. - */ - se->avg.last_runnable_update += __synchronize_entity_decay(se) - << 20; + __synchronize_entity_decay(se); } /* migrated tasks did not contribute to our blocked load */ From 215393bc1fab3d61a5a296838bdffce22f27ffda Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jan 2014 11:24:35 +0100 Subject: [PATCH 2/4] sched/preempt/x86: Fix voluntary preempt for x86 The #ifdef CONFIG_PREEMPT is both not needed and wrong. Its not required because asm/preempt.h should provide {set,clear}_preempt_need_resched() regardless and its wrong because for voluntary preempt we still rely on PREEMPT_NEED_RESCHED. Reported-and-Tested-by: Markus Trippelsdorf Fixes: 8cb75e0c4ec9 ("sched/preempt: Fix up missed PREEMPT_NEED_RESCHED folding") Signed-off-by: Peter Zijlstra Cc: Dipankar Sarma Cc: "Paul E. McKenney" Link: http://lkml.kernel.org/r/20140122102435.GH31570@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 59749fc48328..de83b4eb1642 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -134,7 +134,6 @@ do { \ #undef preempt_check_resched #endif -#ifdef CONFIG_PREEMPT #define preempt_set_need_resched() \ do { \ set_preempt_need_resched(); \ @@ -144,10 +143,6 @@ do { \ if (tif_need_resched()) \ set_preempt_need_resched(); \ } while (0) -#else -#define preempt_set_need_resched() do { } while (0) -#define preempt_fold_need_resched() do { } while (0) -#endif #ifdef CONFIG_PREEMPT_NOTIFIERS From d375b4e0fa3771343b370be0d876a1963c02e0a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jan 2014 12:59:18 +0100 Subject: [PATCH 3/4] sched/clock: Fixup early initialization The code would assume sched_clock_stable() and switch to !stable later, this switch brings a discontinuity in time. The discontinuity on switching from stable to unstable was always present, but previously we would set stable/unstable before initializing TSC and usually stick to the one we start out with. So the static_key bits brought an extra switch where there previously wasn't one. Things are further complicated by the fact that we cannot use static_key as early as we usually call set_sched_clock_stable(). Fix things by tracking the stable state in a regular variable and only set the static_key to the right state on sched_clock_init(), which is ran right after late_time_init->tsc_init(). Before this we would not be using the TSC anyway. Reported-and-Tested-by: Sasha Levin Reported-by: dyoung@redhat.com Fixes: 35af99e646c7 ("sched/clock, x86: Use a static_key for sched_clock_stable") Cc: jacob.jun.pan@linux.intel.com Cc: Mike Galbraith Cc: hpa@zytor.com Cc: paulmck@linux.vnet.ibm.com Cc: John Stultz Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: lenb@kernel.org Cc: rjw@rjwysocki.net Cc: Eliezer Tamir Cc: rui.zhang@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140122115918.GG3694@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/clock.c | 53 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 6bd6a6731b21..43c2bcc35761 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,35 +77,50 @@ __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static struct static_key __sched_clock_stable = STATIC_KEY_INIT; +static int __sched_clock_stable_early; int sched_clock_stable(void) { - if (static_key_false(&__sched_clock_stable)) - return false; - return true; + return static_key_false(&__sched_clock_stable); +} + +static void __set_sched_clock_stable(void) +{ + if (!sched_clock_stable()) + static_key_slow_inc(&__sched_clock_stable); } void set_sched_clock_stable(void) { - if (!sched_clock_stable()) - static_key_slow_dec(&__sched_clock_stable); + __sched_clock_stable_early = 1; + + smp_mb(); /* matches sched_clock_init() */ + + if (!sched_clock_running) + return; + + __set_sched_clock_stable(); } static void __clear_sched_clock_stable(struct work_struct *work) { /* XXX worry about clock continuity */ if (sched_clock_stable()) - static_key_slow_inc(&__sched_clock_stable); + static_key_slow_dec(&__sched_clock_stable); } static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); void clear_sched_clock_stable(void) { - if (keventd_up()) - schedule_work(&sched_clock_work); - else - __clear_sched_clock_stable(&sched_clock_work); + __sched_clock_stable_early = 0; + + smp_mb(); /* matches sched_clock_init() */ + + if (!sched_clock_running) + return; + + schedule_work(&sched_clock_work); } struct sched_clock_data { @@ -140,6 +155,20 @@ void sched_clock_init(void) } sched_clock_running = 1; + + /* + * Ensure that it is impossible to not do a static_key update. + * + * Either {set,clear}_sched_clock_stable() must see sched_clock_running + * and do the update, or we must see their __sched_clock_stable_early + * and do the update, or both. + */ + smp_mb(); /* matches {set,clear}_sched_clock_stable() */ + + if (__sched_clock_stable_early) + __set_sched_clock_stable(); + else + __clear_sched_clock_stable(NULL); } /* @@ -340,7 +369,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); */ u64 cpu_clock(int cpu) { - if (static_key_false(&__sched_clock_stable)) + if (!sched_clock_stable()) return sched_clock_cpu(cpu); return sched_clock(); @@ -355,7 +384,7 @@ u64 cpu_clock(int cpu) */ u64 local_clock(void) { - if (static_key_false(&__sched_clock_stable)) + if (!sched_clock_stable()) return sched_clock_cpu(raw_smp_processor_id()); return sched_clock(); From 5e3c1afd4587e70c201bf7224b51f747c9a3dfa8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jan 2014 22:08:14 +0100 Subject: [PATCH 4/4] sched/x86/tsc: Initialize multiplier to 0 Since we keep the clock value linearly continuous on frequency change, make sure the initial multiplier is 0, such that our initial value is 0. Without this we compute the initial value at whatever the TSC has managed to reach since power-on. Reported-and-Tested-by: Markus Trippelsdorf Fixes: 20d1c86a57762 ("sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs") Cc: lenb@kernel.org Cc: rjw@rjwysocki.net Cc: Eliezer Tamir Cc: rui.zhang@intel.com Cc: jacob.jun.pan@linux.intel.com Cc: Mike Galbraith Cc: hpa@zytor.com Cc: paulmck@linux.vnet.ibm.com Cc: John Stultz Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Sasha Levin Cc: dyoung@redhat.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140123094804.GP30183@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a3acbac2ee72..19e5adb49a27 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -180,7 +180,7 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) static void cyc2ns_data_init(struct cyc2ns_data *data) { - data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR; + data->cyc2ns_mul = 0; data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; data->cyc2ns_offset = 0; data->__count = 0;