remarkable-linux/kernel/trace/trace_clock.c
Steven Rostedt 5168ae50a6 tracing: Remove ftrace_preempt_disable/enable
The ftrace_preempt_disable/enable functions were to address a
recursive race caused by the function tracer. The function tracer
traces all functions which makes it easily susceptible to recursion.
One area was preempt_enable(). This would call the scheduler and
the schedulre would call the function tracer and loop.
(So was it thought).

The ftrace_preempt_disable/enable was made to protect against recursion
inside the scheduler by storing the NEED_RESCHED flag. If it was
set before the ftrace_preempt_disable() it would not call schedule
on ftrace_preempt_enable(), thinking that if it was set before then
it would have already scheduled unless it was already in the scheduler.

This worked fine except in the case of SMP, where another task would set
the NEED_RESCHED flag for a task on another CPU, and then kick off an
IPI to trigger it. This could cause the NEED_RESCHED to be saved at
ftrace_preempt_disable() but the IPI to arrive in the the preempt
disabled section. The ftrace_preempt_enable() would not call the scheduler
because the flag was already set before entring the section.

This bug would cause a missed preemption check and cause lower latencies.

Investigating further, I found that the recusion caused by the function
tracer was not due to schedule(), but due to preempt_schedule(). Now
that preempt_schedule is completely annotated with notrace, the recusion
no longer is an issue.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2010-06-03 19:32:38 -04:00

116 lines
2.7 KiB
C

/*
* tracing clocks
*
* Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* Implements 3 trace clock variants, with differing scalability/precision
* tradeoffs:
*
* - local: CPU-local trace clock
* - medium: scalable global clock with some jitter
* - global: globally monotonic, serialized clock
*
* Tracer plugins will chose a default from these clocks.
*/
#include <linux/spinlock.h>
#include <linux/irqflags.h>
#include <linux/hardirq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/ktime.h>
#include <linux/trace_clock.h>
#include "trace.h"
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
*
* Useful for tracing that does not cross to other CPUs nor
* does it go through idle events.
*/
u64 notrace trace_clock_local(void)
{
u64 clock;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
preempt_disable_notrace();
clock = sched_clock();
preempt_enable_notrace();
return clock;
}
/*
* trace_clock(): 'inbetween' trace clock. Not completely serialized,
* but not completely incorrect when crossing CPUs either.
*
* This is based on cpu_clock(), which will allow at most ~1 jiffy of
* jitter between CPUs. So it's a pretty scalable clock, but there
* can be offsets in the trace data.
*/
u64 notrace trace_clock(void)
{
return cpu_clock(raw_smp_processor_id());
}
/*
* trace_clock_global(): special globally coherent trace clock
*
* It has higher overhead than the other trace clocks but is still
* an order of magnitude faster than GTOD derived hardware clocks.
*
* Used by plugins that need globally coherent timestamps.
*/
/* keep prev_time and lock in the same cacheline. */
static struct {
u64 prev_time;
arch_spinlock_t lock;
} trace_clock_struct ____cacheline_aligned_in_smp =
{
.lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
};
u64 notrace trace_clock_global(void)
{
unsigned long flags;
int this_cpu;
u64 now;
local_irq_save(flags);
this_cpu = raw_smp_processor_id();
now = cpu_clock(this_cpu);
/*
* If in an NMI context then dont risk lockups and return the
* cpu_clock() time:
*/
if (unlikely(in_nmi()))
goto out;
arch_spin_lock(&trace_clock_struct.lock);
/*
* TODO: if this happens often then maybe we should reset
* my_scd->clock to prev_time+1, to make sure
* we start ticking with the local clock from now on?
*/
if ((s64)(now - trace_clock_struct.prev_time) < 0)
now = trace_clock_struct.prev_time + 1;
trace_clock_struct.prev_time = now;
arch_spin_unlock(&trace_clock_struct.lock);
out:
local_irq_restore(flags);
return now;
}