From c73464b1c8434ad4cbfd5369c3e724f3e8ffe5a4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 18:06:56 +0200 Subject: [PATCH] sched/core: Fix trace_sched_switch() __trace_sched_switch_state() is the last remaining PREEMPT_ACTIVE user, move trace_sched_switch() from prepare_task_switch() to __schedule() and propagate the @preempt argument. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Steven Rostedt Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 24 ++++++++++-------------- kernel/sched/core.c | 2 +- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_sched_switch.c | 3 ++- kernel/trace/trace_sched_wakeup.c | 2 +- 5 files changed, 15 insertions(+), 18 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 539d6bc3216a..9b90c57517a9 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -104,22 +104,17 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS -static inline long __trace_sched_switch_state(struct task_struct *p) +static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { - long state = p->state; - -#ifdef CONFIG_PREEMPT #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ - /* - * For all intents and purposes a preempted task is a running task. - */ - if (preempt_count() & PREEMPT_ACTIVE) - state = TASK_RUNNING | TASK_STATE_MAX; -#endif /* CONFIG_PREEMPT */ - return state; + /* + * Preemption ignores task state, therefore preempted tasks are always + * RUNNING (we will not have dequeued if state != RUNNING). + */ + return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; } #endif /* CREATE_TRACE_POINTS */ @@ -128,10 +123,11 @@ static inline long __trace_sched_switch_state(struct task_struct *p) */ TRACE_EVENT(sched_switch, - TP_PROTO(struct task_struct *prev, + TP_PROTO(bool preempt, + struct task_struct *prev, struct task_struct *next), - TP_ARGS(prev, next), + TP_ARGS(preempt, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -147,7 +143,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = __trace_sched_switch_state(prev); + __entry->prev_state = __trace_sched_switch_state(preempt, prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0a71f89fb3fc..cfad7f5f74f8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2470,7 +2470,6 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - trace_sched_switch(prev, next); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); @@ -3132,6 +3131,7 @@ static void __sched __schedule(bool preempt) rq->curr = next; ++*switch_count; + trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the rq */ cpu = cpu_of(rq); } else { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b0623ac785a2..00611e95a8ee 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5697,7 +5697,7 @@ free: } static void -ftrace_graph_probe_sched_switch(void *ignore, +ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { unsigned long long timestamp; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index f270088e9929..4c896a0101bd 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,7 +16,8 @@ static int sched_ref; static DEFINE_MUTEX(sched_register_mutex); static void -probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) +probe_sched_switch(void *ignore, bool preempt, + struct task_struct *prev, struct task_struct *next) { if (unlikely(!sched_ref)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 12cbe77b4136..4bcfbac289ff 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -420,7 +420,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, } static void notrace -probe_wakeup_sched_switch(void *ignore, +probe_wakeup_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data;