From f5abaa1bfc3dbf26d19d3513f39279ca369f8d65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Jun 2013 11:44:44 -0400 Subject: [PATCH 1/7] tracing: Add DEFINE_EVENT_FN() macro Each TRACE_EVENT() adds several helper functions. If two or more trace events share the same structure and print format, they can also share most of these helper functions and save a lot of space from duplicate code. This is why the DECLARE_EVENT_CLASS() and DEFINE_EVENT() were created. Some events require a trigger to be called at registering and unregistering of the event and to do so they use TRACE_EVENT_FN(). If multiple events require a trigger, they currently have no choice but to use TRACE_EVENT_FN() as there's no DEFINE_EVENT_FN() available. This unfortunately causes a lot of wasted duplicate code created. By adding a DEFINE_EVENT_FN(), these events can still use a DECLARE_EVENT_CLASS() and then define their own triggers. Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/51C3236C.8030508@hds.com Signed-off-by: Seiji Aguchi Signed-off-by: H. Peter Anvin --- include/linux/tracepoint.h | 2 ++ include/trace/define_trace.h | 5 +++++ include/trace/ftrace.h | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index f8e084d0fc77..ebeab360d851 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -378,6 +378,8 @@ static inline void tracepoint_synchronize_unregister(void) #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 1905ca8dd399..02e1003568a4 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -44,6 +44,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_FN +#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg) + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_TRACE(name) @@ -91,6 +95,7 @@ #undef TRACE_EVENT_CONDITION #undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT +#undef DEFINE_EVENT_FN #undef DEFINE_EVENT_PRINT #undef DEFINE_EVENT_CONDITION #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 19edd7facaa1..d615f78cc6b6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -71,6 +71,10 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) event_##name +#undef DEFINE_EVENT_FN +#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) From eddc0e922a3530e0f22cef170229bcae3a7d5e31 Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Thu, 20 Jun 2013 11:45:17 -0400 Subject: [PATCH 2/7] x86, trace: Introduce entering/exiting_irq() When implementing tracepoints in interrupt handers, if the tracepoints are simply added in the performance sensitive path of interrupt handers, it may cause potential performance problem due to the time penalty. To solve the problem, an idea is to prepare non-trace/trace irq handers and switch their IDTs at the enabling/disabling time. So, let's introduce entering_irq()/exiting_irq() for pre/post- processing of each irq handler. A way to use them is as follows. Non-trace irq handler: smp_irq_handler() { entering_irq(); /* pre-processing of this handler */ __smp_irq_handler(); /* * common logic between non-trace and trace handlers * in a vector. */ exiting_irq(); /* post-processing of this handler */ } Trace irq_handler: smp_trace_irq_handler() { entering_irq(); /* pre-processing of this handler */ trace_irq_entry(); /* tracepoint for irq entry */ __smp_irq_handler(); /* * common logic between non-trace and trace handlers * in a vector. */ trace_irq_exit(); /* tracepoint for irq exit */ exiting_irq(); /* post-processing of this handler */ } If tracepoints can place outside entering_irq()/exiting_irq() as follows, it looks cleaner. smp_trace_irq_handler() { trace_irq_entry(); smp_irq_handler(); trace_irq_exit(); } But it doesn't work. The problem is with irq_enter/exit() being called. They must be called before trace_irq_enter/exit(), because of the rcu_irq_enter() must be called before any tracepoints are used, as tracepoints use rcu to synchronize. As a possible alternative, we may be able to call irq_enter() first as follows if irq_enter() can nest. smp_trace_irq_hander() { irq_entry(); trace_irq_entry(); smp_irq_handler(); trace_irq_exit(); irq_exit(); } But it doesn't work, either. If irq_enter() is nested, it may have a time penalty because it has to check if it was already called or not. The time penalty is not desired in performance sensitive paths even if it is tiny. Signed-off-by: Seiji Aguchi Link: http://lkml.kernel.org/r/51C3238D.9040706@hds.com Signed-off-by: H. Peter Anvin Cc: Steven Rostedt --- arch/x86/include/asm/apic.h | 27 +++++++++++++++++ arch/x86/kernel/apic/apic.c | 33 ++++++++++++--------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 14 +++++---- arch/x86/kernel/cpu/mcheck/threshold.c | 14 +++++---- arch/x86/kernel/irq.c | 18 +++++------- arch/x86/kernel/irq_work.c | 14 +++++++-- arch/x86/kernel/smp.c | 37 ++++++++++++++++++------ 7 files changed, 110 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 338803422239..f8119b582c3c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -687,5 +688,31 @@ extern int default_check_phys_apicid_present(int phys_apicid); #endif #endif /* CONFIG_X86_LOCAL_APIC */ +extern void irq_enter(void); +extern void irq_exit(void); + +static inline void entering_irq(void) +{ + irq_enter(); + exit_idle(); +} + +static inline void entering_ack_irq(void) +{ + ack_APIC_irq(); + entering_irq(); +} + +static inline void exiting_irq(void) +{ + irq_exit(); +} + +static inline void exiting_ack_irq(void) +{ + irq_exit(); + /* Ack only at the end to avoid potential reentry */ + ack_APIC_irq(); +} #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 904611bf0e5a..59ee76fe1c53 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -919,17 +919,14 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. - */ - ack_APIC_irq(); - /* + * * update_process_times() expects us to have done irq_enter(). * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - irq_enter(); - exit_idle(); + entering_ack_irq(); local_apic_timer_interrupt(); - irq_exit(); + exiting_irq(); set_irq_regs(old_regs); } @@ -1907,12 +1904,10 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -void smp_spurious_interrupt(struct pt_regs *regs) +static inline void __smp_spurious_interrupt(void) { u32 v; - irq_enter(); - exit_idle(); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1927,13 +1922,19 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); - irq_exit(); +} + +void smp_spurious_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_spurious_interrupt(); + exiting_irq(); } /* * This interrupt should never happen with our APIC/SMP architecture */ -void smp_error_interrupt(struct pt_regs *regs) +static inline void __smp_error_interrupt(struct pt_regs *regs) { u32 v0, v1; u32 i = 0; @@ -1948,8 +1949,6 @@ void smp_error_interrupt(struct pt_regs *regs) "Illegal register address", /* APIC Error Bit 7 */ }; - irq_enter(); - exit_idle(); /* First tickle the hardware, only then report what went on. -- REW */ v0 = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); @@ -1970,7 +1969,13 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT "\n"); - irq_exit(); +} + +void smp_error_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_error_interrupt(regs); + exiting_irq(); } /** diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 47a1870279aa..f6b35f2a6a37 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -378,15 +378,17 @@ static void unexpected_thermal_interrupt(void) static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +static inline void __smp_thermal_interrupt(void) { - irq_enter(); - exit_idle(); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); +} + +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_thermal_interrupt(); + exiting_ack_irq(); } /* Thermal monitoring depends on APIC, ACPI and clock modulation */ diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index aa578cadb940..610cd98d6ef9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -17,13 +17,15 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage void smp_threshold_interrupt(void) +static inline void __smp_threshold_interrupt(void) { - irq_enter(); - exit_idle(); inc_irq_stat(irq_threshold_count); mce_threshold_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); +} + +asmlinkage void smp_threshold_interrupt(void) +{ + entering_irq(); + __smp_threshold_interrupt(); + exiting_ack_irq(); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index ac0631d8996f..e3b8df1754cc 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -204,23 +204,21 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) /* * Handler for X86_PLATFORM_IPI_VECTOR. */ -void smp_x86_platform_ipi(struct pt_regs *regs) +void __smp_x86_platform_ipi(void) { - struct pt_regs *old_regs = set_irq_regs(regs); - - ack_APIC_irq(); - - irq_enter(); - - exit_idle(); - inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) x86_platform_ipi_callback(); +} - irq_exit(); +void smp_x86_platform_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + entering_ack_irq(); + __smp_x86_platform_ipi(); + exiting_irq(); set_irq_regs(old_regs); } diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index ca8f703a1e70..074d46fdbd1f 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -9,13 +9,23 @@ #include #include -void smp_irq_work_interrupt(struct pt_regs *regs) +static inline void irq_work_entering_irq(void) { irq_enter(); ack_APIC_irq(); +} + +static inline void __smp_irq_work_interrupt(void) +{ inc_irq_stat(apic_irq_work_irqs); irq_work_run(); - irq_exit(); +} + +void smp_irq_work_interrupt(struct pt_regs *regs) +{ + irq_work_entering_irq(); + __smp_irq_work_interrupt(); + exiting_irq(); } void arch_irq_work_raise(void) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 48d2b7ded422..d85837574a79 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -249,32 +249,51 @@ finish: /* * Reschedule call back. */ +static inline void __smp_reschedule_interrupt(void) +{ + inc_irq_stat(irq_resched_count); + scheduler_ipi(); +} + void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); - inc_irq_stat(irq_resched_count); - scheduler_ipi(); + __smp_reschedule_interrupt(); /* * KVM uses this interrupt to force a cpu out of guest mode */ } -void smp_call_function_interrupt(struct pt_regs *regs) +static inline void call_function_entering_irq(void) { ack_APIC_irq(); irq_enter(); +} + +static inline void __smp_call_function_interrupt(void) +{ generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count); - irq_exit(); +} + +void smp_call_function_interrupt(struct pt_regs *regs) +{ + call_function_entering_irq(); + __smp_call_function_interrupt(); + exiting_irq(); +} + +static inline void __smp_call_function_single_interrupt(void) +{ + generic_smp_call_function_single_interrupt(); + inc_irq_stat(irq_call_count); } void smp_call_function_single_interrupt(struct pt_regs *regs) { - ack_APIC_irq(); - irq_enter(); - generic_smp_call_function_single_interrupt(); - inc_irq_stat(irq_call_count); - irq_exit(); + call_function_entering_irq(); + __smp_call_function_single_interrupt(); + exiting_irq(); } static int __init nonmi_ipi_setup(char *str) From 629f4f9d59a27d8e58aa612e886e6a9a63ea7aeb Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Thu, 20 Jun 2013 11:45:44 -0400 Subject: [PATCH 3/7] x86: Rename variables for debugging Rename variables for debugging to describe meaning of them precisely. Also, introduce a generic way to switch IDT by checking a current state, debug on/off. Signed-off-by: Seiji Aguchi Link: http://lkml.kernel.org/r/51C323A8.7050905@hds.com Signed-off-by: H. Peter Anvin Cc: Steven Rostedt --- arch/x86/include/asm/desc.h | 47 +++++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/common.c | 16 ++++++------ arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/traps.c | 2 +- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 8bf1c06070d5..af290b8f124a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -36,8 +36,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; -extern struct desc_ptr nmi_idt_descr; -extern gate_desc nmi_idt_table[]; +extern struct desc_ptr debug_idt_descr; +extern gate_desc debug_idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; @@ -316,7 +316,7 @@ static inline void set_nmi_gate(int gate, void *addr) gate_desc s; pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); - write_idt_entry(nmi_idt_table, gate, &s); + write_idt_entry(debug_idt_table, gate, &s); } #endif @@ -405,4 +405,45 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); } +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(u32, debug_idt_ctr); +static inline bool is_debug_idt_enabled(void) +{ + if (this_cpu_read(debug_idt_ctr)) + return true; + + return false; +} + +static inline void load_debug_idt(void) +{ + load_idt((const struct desc_ptr *)&debug_idt_descr); +} +#else +static inline bool is_debug_idt_enabled(void) +{ + return false; +} + +static inline void load_debug_idt(void) +{ +} +#endif + +/* + * the load_current_idt() is called with interrupt disabled by local_irq_save() + * to avoid races. That way the IDT will always be set back to the expected + * descriptor. + */ +static inline void load_current_idt(void) +{ + unsigned long flags; + + local_irq_save(flags); + if (is_debug_idt_enabled()) + load_debug_idt(); + else + load_idt((const struct desc_ptr *)&idt_descr); + local_irq_restore(flags); +} #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22018f70a671..8f6a0f909d6f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1071,8 +1071,8 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; -struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, - (unsigned long) nmi_idt_table }; +struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, + (unsigned long) debug_idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); @@ -1148,20 +1148,20 @@ int is_debug_stack(unsigned long addr) addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } -static DEFINE_PER_CPU(u32, debug_stack_use_ctr); +DEFINE_PER_CPU(u32, debug_idt_ctr); void debug_stack_set_zero(void) { - this_cpu_inc(debug_stack_use_ctr); - load_idt((const struct desc_ptr *)&nmi_idt_descr); + this_cpu_inc(debug_idt_ctr); + load_current_idt(); } void debug_stack_reset(void) { - if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) + if (WARN_ON(!this_cpu_read(debug_idt_ctr))) return; - if (this_cpu_dec_return(debug_stack_use_ctr) == 0) - load_idt((const struct desc_ptr *)&idt_descr); + if (this_cpu_dec_return(debug_idt_ctr) == 0) + load_current_idt(); } #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 321d65ebaffe..84fb779d5b74 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -518,7 +518,7 @@ ENTRY(idt_table) .skip IDT_ENTRIES * 16 .align L1_CACHE_BYTES -ENTRY(nmi_idt_table) +ENTRY(debug_idt_table) .skip IDT_ENTRIES * 16 __PAGE_ALIGNED_BSS diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 772e2a846dec..d27182d6ed2a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -785,7 +785,7 @@ void __init trap_init(void) x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 - memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); + memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); set_nmi_gate(X86_TRAP_DB, &debug); set_nmi_gate(X86_TRAP_BP, &int3); #endif From cf910e83ae23692fdeefc7e506e504c4c468d38a Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Thu, 20 Jun 2013 11:46:53 -0400 Subject: [PATCH 4/7] x86, trace: Add irq vector tracepoints [Purpose of this patch] As Vaibhav explained in the thread below, tracepoints for irq vectors are useful. http://www.spinics.net/lists/mm-commits/msg85707.html The current interrupt traces from irq_handler_entry and irq_handler_exit provide when an interrupt is handled. They provide good data about when the system has switched to kernel space and how it affects the currently running processes. There are some IRQ vectors which trigger the system into kernel space, which are not handled in generic IRQ handlers. Tracing such events gives us the information about IRQ interaction with other system events. The trace also tells where the system is spending its time. We want to know which cores are handling interrupts and how they are affecting other processes in the system. Also, the trace provides information about when the cores are idle and which interrupts are changing that state. On the other hand, my usecase is tracing just local timer event and getting a value of instruction pointer. I suggested to add an argument local timer event to get instruction pointer before. But there is another way to get it with external module like systemtap. So, I don't need to add any argument to irq vector tracepoints now. [Patch Description] Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events. But there is an above use case to trace specific irq_vector rather than tracing all events. In this case, we are concerned about overhead due to unwanted events. So, add following tracepoints instead of introducing irq_vector_entry/exit. so that we can enable them independently. - local_timer_vector - reschedule_vector - call_function_vector - call_function_single_vector - irq_work_entry_vector - error_apic_vector - thermal_apic_vector - threshold_apic_vector - spurious_apic_vector - x86_platform_ipi_vector Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty makes a zero when tracepoints are disabled. Detailed explanations are as follows. - Create trace irq handlers with entering_irq()/exiting_irq(). - Create a new IDT, trace_idt_table, at boot time by adding a logic to _set_gate(). It is just a copy of original idt table. - Register the new handlers for tracpoints to the new IDT by introducing macros to alloc_intr_gate() called at registering time of irq_vector handlers. - Add checking, whether irq vector tracing is on/off, into load_current_idt(). This has to be done below debug checking for these reasons. - Switching to debug IDT may be kicked while tracing is enabled. - On the other hands, switching to trace IDT is kicked only when debugging is disabled. In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being used for other purposes. Signed-off-by: Seiji Aguchi Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com Signed-off-by: H. Peter Anvin Cc: Steven Rostedt --- arch/x86/include/asm/desc.h | 72 +++++++++++++++- arch/x86/include/asm/entry_arch.h | 8 +- arch/x86/include/asm/hw_irq.h | 17 ++++ arch/x86/include/asm/mshyperv.h | 3 + arch/x86/include/asm/trace/irq_vectors.h | 104 +++++++++++++++++++++++ arch/x86/include/asm/uv/uv_bau.h | 3 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/apic/Makefile | 1 + arch/x86/kernel/apic/apic.c | 42 +++++++++ arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/mcheck/therm_throt.c | 10 +++ arch/x86/kernel/cpu/mcheck/threshold.c | 10 +++ arch/x86/kernel/entry_32.S | 12 ++- arch/x86/kernel/entry_64.S | 31 +++++-- arch/x86/kernel/head_64.S | 6 ++ arch/x86/kernel/irq.c | 13 +++ arch/x86/kernel/irq_work.c | 10 +++ arch/x86/kernel/smp.c | 30 +++++++ arch/x86/kernel/tracepoint.c | 57 +++++++++++++ include/xen/events.h | 3 + 20 files changed, 422 insertions(+), 15 deletions(-) create mode 100644 arch/x86/include/asm/trace/irq_vectors.h create mode 100644 arch/x86/kernel/tracepoint.c diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index af290b8f124a..1377ecb29d8d 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -320,6 +320,19 @@ static inline void set_nmi_gate(int gate, void *addr) } #endif +#ifdef CONFIG_TRACING +extern struct desc_ptr trace_idt_descr; +extern gate_desc trace_idt_table[]; +static inline void write_trace_idt_entry(int entry, const gate_desc *gate) +{ + write_idt_entry(trace_idt_table, entry, gate); +} +#else +static inline void write_trace_idt_entry(int entry, const gate_desc *gate) +{ +} +#endif + static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { @@ -331,6 +344,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr, * setup time */ write_idt_entry(idt_table, gate, &s); + write_trace_idt_entry(gate, &s); } /* @@ -360,12 +374,39 @@ static inline void alloc_system_vector(int vector) } } -static inline void alloc_intr_gate(unsigned int n, void *addr) +#ifdef CONFIG_TRACING +static inline void trace_set_intr_gate(unsigned int gate, void *addr) +{ + gate_desc s; + + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); + write_idt_entry(trace_idt_table, gate, &s); +} + +static inline void __trace_alloc_intr_gate(unsigned int n, void *addr) +{ + trace_set_intr_gate(n, addr); +} +#else +static inline void trace_set_intr_gate(unsigned int gate, void *addr) +{ +} + +#define __trace_alloc_intr_gate(n, addr) +#endif + +static inline void __alloc_intr_gate(unsigned int n, void *addr) { - alloc_system_vector(n); set_intr_gate(n, addr); } +#define alloc_intr_gate(n, addr) \ + do { \ + alloc_system_vector(n); \ + __alloc_intr_gate(n, addr); \ + __trace_alloc_intr_gate(n, trace_##addr); \ + } while (0) + /* * This routine sets up an interrupt gate at directory privilege level 3. */ @@ -430,6 +471,31 @@ static inline void load_debug_idt(void) } #endif +#ifdef CONFIG_TRACING +extern atomic_t trace_idt_ctr; +static inline bool is_trace_idt_enabled(void) +{ + if (atomic_read(&trace_idt_ctr)) + return true; + + return false; +} + +static inline void load_trace_idt(void) +{ + load_idt((const struct desc_ptr *)&trace_idt_descr); +} +#else +static inline bool is_trace_idt_enabled(void) +{ + return false; +} + +static inline void load_trace_idt(void) +{ +} +#endif + /* * the load_current_idt() is called with interrupt disabled by local_irq_save() * to avoid races. That way the IDT will always be set back to the expected @@ -442,6 +508,8 @@ static inline void load_current_idt(void) local_irq_save(flags); if (is_debug_idt_enabled()) load_debug_idt(); + else if (is_trace_idt_enabled()) + load_trace_idt(); else load_idt((const struct desc_ptr *)&idt_descr); local_irq_restore(flags); diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 9bd4ecac72be..dc5fa661465f 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -13,14 +13,16 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) -BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) -BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) +BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR, + smp_irq_move_cleanup_interrupt) +BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt) #endif BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_HAVE_KVM -BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, + smp_kvm_posted_intr_ipi) #endif /* diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1da97efad08a..e4ac559c4a24 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -77,6 +77,23 @@ extern void threshold_interrupt(void); extern void call_function_interrupt(void); extern void call_function_single_interrupt(void); +#ifdef CONFIG_TRACING +/* Interrupt handlers registered during init_IRQ */ +extern void trace_apic_timer_interrupt(void); +extern void trace_x86_platform_ipi(void); +extern void trace_error_interrupt(void); +extern void trace_irq_work_interrupt(void); +extern void trace_spurious_interrupt(void); +extern void trace_thermal_interrupt(void); +extern void trace_reschedule_interrupt(void); +extern void trace_threshold_interrupt(void); +extern void trace_call_function_interrupt(void); +extern void trace_call_function_single_interrupt(void); +#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt +#define trace_reboot_interrupt reboot_interrupt +#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi +#endif /* CONFIG_TRACING */ + /* IOAPIC */ #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) extern unsigned long io_apic_irqs; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index c2934be2446a..cd9c41938b8a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -12,6 +12,9 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; void hyperv_callback_vector(void); +#ifdef CONFIG_TRACING +#define trace_hyperv_callback_vector hyperv_callback_vector +#endif void hyperv_vector_handler(struct pt_regs *regs); void hv_register_vmbus_handler(int irq, irq_handler_t handler); diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h new file mode 100644 index 000000000000..2874df24e7a4 --- /dev/null +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -0,0 +1,104 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq_vectors + +#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_VECTORS_H + +#include + +extern void trace_irq_vector_regfunc(void); +extern void trace_irq_vector_unregfunc(void); + +DECLARE_EVENT_CLASS(x86_irq_vector, + + TP_PROTO(int vector), + + TP_ARGS(vector), + + TP_STRUCT__entry( + __field( int, vector ) + ), + + TP_fast_assign( + __entry->vector = vector; + ), + + TP_printk("vector=%d", __entry->vector) ); + +#define DEFINE_IRQ_VECTOR_EVENT(name) \ +DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), \ + trace_irq_vector_regfunc, \ + trace_irq_vector_unregfunc); \ +DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), \ + trace_irq_vector_regfunc, \ + trace_irq_vector_unregfunc); + + +/* + * local_timer - called when entering/exiting a local timer interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(local_timer); + +/* + * reschedule - called when entering/exiting a reschedule vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(reschedule); + +/* + * spurious_apic - called when entering/exiting a spurious apic vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(spurious_apic); + +/* + * error_apic - called when entering/exiting an error apic vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(error_apic); + +/* + * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi); + +/* + * irq_work - called when entering/exiting a irq work interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(irq_work); + +/* + * call_function - called when entering/exiting a call function interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(call_function); + +/* + * call_function_single - called when entering/exiting a call function + * single interrupt vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(call_function_single); + +/* + * threshold_apic - called when entering/exiting a threshold apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(threshold_apic); + +/* + * thermal_apic - called when entering/exiting a thermal apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(thermal_apic); + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE irq_vectors +#endif /* _TRACE_IRQ_VECTORS_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index a06983cdc125..0b46ef261c77 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -731,6 +731,9 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) } extern void uv_bau_message_intr1(void); +#ifdef CONFIG_TRACING +#define trace_uv_bau_message_intr1 uv_bau_message_intr1 +#endif extern void uv_bau_timeout_intr1(void); struct atomic_short { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7bd3bd310106..74b3891afb9b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o +obj-$(CONFIG_TRACING) += tracepoint.o ### # 64 bit specific files diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 0ae0323b1f9c..5274c3a07e75 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,6 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # +CFLAGS_apic.o := -I$(src)/../../include/asm/trace obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o obj-y += hw_nmi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 59ee76fe1c53..61ced40e9c2c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -55,6 +55,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -931,6 +934,27 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) set_irq_regs(old_regs); } +void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + * + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + entering_ack_irq(); + trace_local_timer_entry(LOCAL_TIMER_VECTOR); + local_apic_timer_interrupt(); + trace_local_timer_exit(LOCAL_TIMER_VECTOR); + exiting_irq(); + + set_irq_regs(old_regs); +} + int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; @@ -1931,6 +1955,15 @@ void smp_spurious_interrupt(struct pt_regs *regs) exiting_irq(); } +void smp_trace_spurious_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); + __smp_spurious_interrupt(); + trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR); + exiting_irq(); +} + /* * This interrupt should never happen with our APIC/SMP architecture */ @@ -1978,6 +2011,15 @@ void smp_error_interrupt(struct pt_regs *regs) exiting_irq(); } +void smp_trace_error_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_error_apic_entry(ERROR_APIC_VECTOR); + __smp_error_interrupt(regs); + trace_error_apic_exit(ERROR_APIC_VECTOR); + exiting_irq(); +} + /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f6a0f909d6f..d9c8c0947ec2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1257,7 +1257,7 @@ void __cpuinit cpu_init(void) switch_to_new_gdt(cpu); loadsegment(fs, 0); - load_idt((const struct desc_ptr *)&idt_descr); + load_current_idt(); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); @@ -1334,7 +1334,7 @@ void __cpuinit cpu_init(void) if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - load_idt(&idt_descr); + load_current_idt(); switch_to_new_gdt(cpu); /* diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index f6b35f2a6a37..2f3a7995e56a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -29,6 +29,7 @@ #include #include #include +#include /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -391,6 +392,15 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) exiting_ack_irq(); } +asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_thermal_apic_entry(THERMAL_APIC_VECTOR); + __smp_thermal_interrupt(); + trace_thermal_apic_exit(THERMAL_APIC_VECTOR); + exiting_ack_irq(); +} + /* Thermal monitoring depends on APIC, ACPI and clock modulation */ static int intel_thermal_supported(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 610cd98d6ef9..fe6b1c86645b 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -8,6 +8,7 @@ #include #include #include +#include static void default_threshold_interrupt(void) { @@ -29,3 +30,12 @@ asmlinkage void smp_threshold_interrupt(void) __smp_threshold_interrupt(); exiting_ack_irq(); } + +asmlinkage void smp_trace_threshold_interrupt(void) +{ + entering_irq(); + trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); + __smp_threshold_interrupt(); + trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); + exiting_ack_irq(); +} diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8f3e2dec1df3..2cfbc3a3a2dd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -801,7 +801,17 @@ ENTRY(name) \ CFI_ENDPROC; \ ENDPROC(name) -#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + +#ifdef CONFIG_TRACING +#define TRACE_BUILD_INTERRUPT(name, nr) \ + BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) +#else +#define TRACE_BUILD_INTERRUPT(name, nr) +#endif + +#define BUILD_INTERRUPT(name, nr) \ + BUILD_INTERRUPT3(name, nr, smp_##name); \ + TRACE_BUILD_INTERRUPT(name, nr) /* The include is where all of the SMP etc. interrupts come from */ #include diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 727208941030..11eef43f971f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1138,7 +1138,7 @@ END(common_interrupt) /* * APIC interrupts. */ -.macro apicinterrupt num sym do_sym +.macro apicinterrupt3 num sym do_sym ENTRY(\sym) INTR_FRAME ASM_CLAC @@ -1150,15 +1150,32 @@ ENTRY(\sym) END(\sym) .endm +#ifdef CONFIG_TRACING +#define trace(sym) trace_##sym +#define smp_trace(sym) smp_trace_##sym + +.macro trace_apicinterrupt num sym +apicinterrupt3 \num trace(\sym) smp_trace(\sym) +.endm +#else +.macro trace_apicinterrupt num sym do_sym +.endm +#endif + +.macro apicinterrupt num sym do_sym +apicinterrupt3 \num \sym \do_sym +trace_apicinterrupt \num \sym +.endm + #ifdef CONFIG_SMP -apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ +apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt -apicinterrupt REBOOT_VECTOR \ +apicinterrupt3 REBOOT_VECTOR \ reboot_interrupt smp_reboot_interrupt #endif #ifdef CONFIG_X86_UV -apicinterrupt UV_BAU_MESSAGE \ +apicinterrupt3 UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt #endif apicinterrupt LOCAL_TIMER_VECTOR \ @@ -1167,7 +1184,7 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_HAVE_KVM -apicinterrupt POSTED_INTR_VECTOR \ +apicinterrupt3 POSTED_INTR_VECTOR \ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi #endif @@ -1451,13 +1468,13 @@ ENTRY(xen_failsafe_callback) CFI_ENDPROC END(xen_failsafe_callback) -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall #endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 84fb779d5b74..5e4d8a8a5c40 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -521,6 +521,12 @@ ENTRY(idt_table) ENTRY(debug_idt_table) .skip IDT_ENTRIES * 16 +#ifdef CONFIG_TRACING + .align L1_CACHE_BYTES +ENTRY(trace_idt_table) + .skip IDT_ENTRIES * 16 +#endif + __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e3b8df1754cc..06af119743a6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -17,6 +17,7 @@ #include #include #include +#include atomic_t irq_err_count; @@ -244,6 +245,18 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs) } #endif +void smp_trace_x86_platform_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_ack_irq(); + trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); + __smp_x86_platform_ipi(); + trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); + exiting_irq(); + set_irq_regs(old_regs); +} + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 074d46fdbd1f..636a55e4a13c 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -8,6 +8,7 @@ #include #include #include +#include static inline void irq_work_entering_irq(void) { @@ -28,6 +29,15 @@ void smp_irq_work_interrupt(struct pt_regs *regs) exiting_irq(); } +void smp_trace_irq_work_interrupt(struct pt_regs *regs) +{ + irq_work_entering_irq(); + trace_irq_work_entry(IRQ_WORK_VECTOR); + __smp_irq_work_interrupt(); + trace_irq_work_exit(IRQ_WORK_VECTOR); + exiting_irq(); +} + void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d85837574a79..f4fe0b8879e0 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * @@ -264,6 +265,17 @@ void smp_reschedule_interrupt(struct pt_regs *regs) */ } +void smp_trace_reschedule_interrupt(struct pt_regs *regs) +{ + ack_APIC_irq(); + trace_reschedule_entry(RESCHEDULE_VECTOR); + __smp_reschedule_interrupt(); + trace_reschedule_exit(RESCHEDULE_VECTOR); + /* + * KVM uses this interrupt to force a cpu out of guest mode + */ +} + static inline void call_function_entering_irq(void) { ack_APIC_irq(); @@ -283,6 +295,15 @@ void smp_call_function_interrupt(struct pt_regs *regs) exiting_irq(); } +void smp_trace_call_function_interrupt(struct pt_regs *regs) +{ + call_function_entering_irq(); + trace_call_function_entry(CALL_FUNCTION_VECTOR); + __smp_call_function_interrupt(); + trace_call_function_exit(CALL_FUNCTION_VECTOR); + exiting_irq(); +} + static inline void __smp_call_function_single_interrupt(void) { generic_smp_call_function_single_interrupt(); @@ -296,6 +317,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) exiting_irq(); } +void smp_trace_call_function_single_interrupt(struct pt_regs *regs) +{ + call_function_entering_irq(); + trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); + __smp_call_function_single_interrupt(); + trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); + exiting_irq(); +} + static int __init nonmi_ipi_setup(char *str) { smp_no_nmi_ipi = true; diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c new file mode 100644 index 000000000000..1423efe98fbc --- /dev/null +++ b/arch/x86/kernel/tracepoint.c @@ -0,0 +1,57 @@ +/* + * Code for supporting irq vector tracepoints. + * + * Copyright (C) 2013 Seiji Aguchi + * + */ +#include +#include +#include + +atomic_t trace_idt_ctr = ATOMIC_INIT(0); +struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, + (unsigned long) trace_idt_table }; + +#ifndef CONFIG_X86_64 +gate_desc trace_idt_table[NR_VECTORS] __page_aligned_data + = { { { { 0, 0 } } }, }; +#endif + +static int trace_irq_vector_refcount; +static DEFINE_MUTEX(irq_vector_mutex); + +static void set_trace_idt_ctr(int val) +{ + atomic_set(&trace_idt_ctr, val); + /* Ensure the trace_idt_ctr is set before sending IPI */ + wmb(); +} + +static void switch_idt(void *arg) +{ + load_current_idt(); +} + +void trace_irq_vector_regfunc(void) +{ + mutex_lock(&irq_vector_mutex); + if (!trace_irq_vector_refcount) { + set_trace_idt_ctr(1); + smp_call_function(switch_idt, NULL, 0); + switch_idt(NULL); + } + trace_irq_vector_refcount++; + mutex_unlock(&irq_vector_mutex); +} + +void trace_irq_vector_unregfunc(void) +{ + mutex_lock(&irq_vector_mutex); + trace_irq_vector_refcount--; + if (!trace_irq_vector_refcount) { + set_trace_idt_ctr(0); + smp_call_function(switch_idt, NULL, 0); + switch_idt(NULL); + } + mutex_unlock(&irq_vector_mutex); +} diff --git a/include/xen/events.h b/include/xen/events.h index b2b27c6a0f7b..c9ea10ee2273 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -76,6 +76,9 @@ unsigned irq_from_evtchn(unsigned int evtchn); /* Xen HVM evtchn vector callback */ void xen_hvm_callback_vector(void); +#ifdef CONFIG_TRACING +#define trace_xen_hvm_callback_vector xen_hvm_callback_vector +#endif extern int xen_have_vector_callback; int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); From 83ab85140bc1492f92de263a1c30ea04a0f465f7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 21 Jun 2013 10:29:05 -0400 Subject: [PATCH 5/7] trace,x86: Move creation of irq tracepoints from apic.c to irq.c Compiling without CONFIG_X86_LOCAL_APIC set, apic.c will not be compiled, and the irq tracepoints will not be created via the CREATE_TRACE_POINTS macro. When CONFIG_X86_LOCAL_APIC is not set, we get the following build error: LD init/built-in.o arch/x86/built-in.o: In function `trace_x86_platform_ipi_entry': linux-test.git/arch/x86/include/asm/trace/irq_vectors.h:66: undefined reference to `__tracepoint_x86_platform_ipi_entry' arch/x86/built-in.o: In function `trace_x86_platform_ipi_exit': linux-test.git/arch/x86/include/asm/trace/irq_vectors.h:66: undefined reference to `__tracepoint_x86_platform_ipi_exit' arch/x86/built-in.o: In function `trace_irq_work_entry': linux-test.git/arch/x86/include/asm/trace/irq_vectors.h:72: undefined reference to `__tracepoint_irq_work_entry' arch/x86/built-in.o: In function `trace_irq_work_exit': linux-test.git/arch/x86/include/asm/trace/irq_vectors.h:72: undefined reference to `__tracepoint_irq_work_exit' arch/x86/built-in.o:(__jump_table+0x8): undefined reference to `__tracepoint_x86_platform_ipi_entry' arch/x86/built-in.o:(__jump_table+0x14): undefined reference to `__tracepoint_x86_platform_ipi_exit' arch/x86/built-in.o:(__jump_table+0x20): undefined reference to `__tracepoint_irq_work_entry' arch/x86/built-in.o:(__jump_table+0x2c): undefined reference to `__tracepoint_irq_work_exit' make[1]: *** [vmlinux] Error 1 make: *** [sub-make] Error 2 As irq.c is always compiled for x86, it is a more appropriate location to create the irq tracepoints. Cc: Seiji Aguchi Signed-off-by: Steven Rostedt --- arch/x86/kernel/Makefile | 2 ++ arch/x86/kernel/apic/Makefile | 1 - arch/x86/kernel/apic/apic.c | 4 +--- arch/x86/kernel/irq.c | 2 ++ 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 74b3891afb9b..44e763f94ad4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif +CFLAGS_irq.o := -I$(src)/../include/asm/trace + obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 5274c3a07e75..0ae0323b1f9c 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,6 @@ # Makefile for local APIC drivers and for the IO-APIC code # -CFLAGS_apic.o := -I$(src)/../../include/asm/trace obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o obj-y += hw_nmi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 61ced40e9c2c..961676c9b8d8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -55,9 +56,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 06af119743a6..3a8185c042a2 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -17,6 +17,8 @@ #include #include #include + +#define CREATE_TRACE_POINTS #include atomic_t irq_err_count; From 2b4bc78956bdcc2bb4c49b3af955be776817e897 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Sat, 22 Jun 2013 13:16:19 -0400 Subject: [PATCH 6/7] trace,x86: Do not call local_irq_save() in load_current_idt() As load_current_idt() is now what is used to update the IDT for the switches needed for NMI, lockdep debug, and for tracing, it must not call local_irq_save(). This is because one of the users of this is lockdep, which does tracing of local_irq_save() and when the debug trap is hit, we need to update the IDT before tracing interrupts being disabled. As load_current_idt() is used to do this, calling local_irq_save() which lockdep traces, defeats the point of calling load_current_idt(). As interrupts are already disabled when used by lockdep and NMI, the only other user is tracing that can disable interrupts itself. Simply have the tracing update disable interrupts before calling load_current_idt() instead of breaking the other users. Here's the dump that happened: ------------[ cut here ]------------ WARNING: at /work/autotest/nobackup/linux-test.git/kernel/fork.c:1196 copy_process+0x2c3/0x1398() DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled) Modules linked in: CPU: 1 PID: 4570 Comm: gdm-simple-gree Not tainted 3.10.0-rc3-test+ #5 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 ffffffff81d2a7a5 ffff88006ed13d50 ffffffff8192822b ffff88006ed13d90 ffffffff81035f25 ffff8800721c6000 ffff88006ed13da0 0000000001200011 0000000000000000 ffff88006ed5e000 ffff8800721c6000 ffff88006ed13df0 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x67/0x80 [] warn_slowpath_fmt+0x46/0x48 [] ? __raw_spin_lock_init+0x31/0x52 [] copy_process+0x2c3/0x1398 [] do_fork+0xa8/0x260 [] ? trace_preempt_on+0x2a/0x2f [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] ? sysret_check+0x1b/0x56 [] ? sysret_check+0x1b/0x56 [] SyS_clone+0x16/0x18 [] stub_clone+0x69/0x90 [] ? system_call_fastpath+0x16/0x1b ---[ end trace 8b157a9d20ca1aa2 ]--- in fork.c: #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); <-- bug here DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif Cc: Seiji Aguchi Signed-off-by: Steven Rostedt --- arch/x86/include/asm/desc.h | 10 ++++------ arch/x86/kernel/tracepoint.c | 4 ++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 1377ecb29d8d..b90e5dfeee46 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -497,21 +497,19 @@ static inline void load_trace_idt(void) #endif /* - * the load_current_idt() is called with interrupt disabled by local_irq_save() + * The load_current_idt() must be called with interrupts disabled * to avoid races. That way the IDT will always be set back to the expected - * descriptor. + * descriptor. It's also called when a CPU is being initialized, and + * that doesn't need to disable interrupts, as nothing should be + * bothering the CPU then. */ static inline void load_current_idt(void) { - unsigned long flags; - - local_irq_save(flags); if (is_debug_idt_enabled()) load_debug_idt(); else if (is_trace_idt_enabled()) load_trace_idt(); else load_idt((const struct desc_ptr *)&idt_descr); - local_irq_restore(flags); } #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 1423efe98fbc..4e584a8d6edd 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -29,7 +29,11 @@ static void set_trace_idt_ctr(int val) static void switch_idt(void *arg) { + unsigned long flags; + + local_irq_save(flags); load_current_idt(); + local_irq_restore(flags); } void trace_irq_vector_regfunc(void) From 33e5ff634f07dec26b7ed1fd7f9e32978fe1f2b2 Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Sat, 22 Jun 2013 07:33:30 -0400 Subject: [PATCH 7/7] x86/tracing: Add config option checking to the definitions of mce handlers In case CONFIG_X86_MCE_THRESHOLD and CONFIG_X86_THERMAL_VECTOR are disabled, kernel build fails as follows. arch/x86/built-in.o: In function `trace_threshold_interrupt': (.entry.text+0x122b): undefined reference to `smp_trace_threshold_interrupt' arch/x86/built-in.o: In function `trace_thermal_interrupt': (.entry.text+0x132b): undefined reference to `smp_trace_thermal_interrupt' In this case, trace_threshold_interrupt/trace_thermal_interrupt are not needed to define. So, add config option checking to their definitions in entry_64.S. Signed-off-by: Seiji Aguchi Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/51C58B8A.2080808@hds.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 11eef43f971f..53d639831a6c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1188,10 +1188,15 @@ apicinterrupt3 POSTED_INTR_VECTOR \ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi #endif +#ifdef CONFIG_X86_MCE_THRESHOLD apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt +#endif + +#ifdef CONFIG_X86_THERMAL_VECTOR apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt +#endif #ifdef CONFIG_SMP apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \