From f3bea49115b21e0995abf41402ad2f4d9c69eda4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 2 Jul 2014 23:23:31 -0400 Subject: [PATCH 01/55] ftrace/x86: Add dynamic allocated trampoline for ftrace_ops The current method of handling multiple function callbacks is to register a list function callback that calls all the other callbacks based on their hash tables and compare it to the function that the callback was called on. But this is very inefficient. For example, if you are tracing all functions in the kernel and then add a kprobe to a function such that the kprobe uses ftrace, the mcount trampoline will switch from calling the function trace callback to calling the list callback that will iterate over all registered ftrace_ops (in this case, the function tracer and the kprobes callback). That means for every function being traced it checks the hash of the ftrace_ops for function tracing and kprobes, even though the kprobes is only set at a single function. The kprobes ftrace_ops is checked for every function being traced! Instead of calling the list function for functions that are only being traced by a single callback, we can call a dynamically allocated trampoline that calls the callback directly. The function graph tracer already uses a direct call trampoline when it is being traced by itself but it is not dynamically allocated. It's trampoline is static in the kernel core. The infrastructure that called the function graph trampoline can also be used to call a dynamically allocated one. For now, only ftrace_ops that are not dynamically allocated can have a trampoline. That is, users such as function tracer or stack tracer. kprobes and perf allocate their ftrace_ops, and until there's a safe way to free the trampoline, it can not be used. The dynamically allocated ftrace_ops may, although, use the trampoline if the kernel is not compiled with CONFIG_PREEMPT. But that will come later. Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 195 ++++++++++++++++++++++++++++++++++-- arch/x86/kernel/mcount_64.S | 25 ++++- include/linux/ftrace.h | 8 ++ kernel/trace/ftrace.c | 40 +++++++- 4 files changed, 254 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3386dc9aa333..e4d48f6cad86 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -644,13 +645,8 @@ int __init ftrace_dyn_arch_init(void) { return 0; } -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); +#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER) static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -664,6 +660,193 @@ static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) */ return calc.code; } +#endif + +/* Currently only x86_64 supports dynamic trampolines */ +#ifdef CONFIG_X86_64 + +#ifdef CONFIG_MODULES +#include +/* Module allocation simplifies allocating memory for code */ +static inline void *alloc_tramp(unsigned long size) +{ + return module_alloc(size); +} +static inline void tramp_free(void *tramp) +{ + module_free(NULL, tramp); +} +#else +/* Trampolines can only be created if modules are supported */ +static inline void *alloc_tramp(unsigned long size) +{ + return NULL; +} +static inline void tramp_free(void *tramp) { } +#endif + +/* Defined as markers to the end of the ftrace default trampolines */ +extern void ftrace_caller_end(void); +extern void ftrace_regs_caller_end(void); +extern void ftrace_return(void); +extern void ftrace_caller_op_ptr(void); +extern void ftrace_regs_caller_op_ptr(void); + +/* movq function_trace_op(%rip), %rdx */ +/* 0x48 0x8b 0x15 */ +#define OP_REF_SIZE 7 + +/* + * The ftrace_ops is passed to the function callback. Since the + * trampoline only services a single ftrace_ops, we can pass in + * that ops directly. + * + * The ftrace_op_code_union is used to create a pointer to the + * ftrace_ops that will be passed to the callback function. + */ +union ftrace_op_code_union { + char code[OP_REF_SIZE]; + struct { + char op[3]; + int offset; + } __attribute__((packed)); +}; + +static unsigned long create_trampoline(struct ftrace_ops *ops) +{ + unsigned const char *jmp; + unsigned long start_offset; + unsigned long end_offset; + unsigned long op_offset; + unsigned long offset; + unsigned long size; + unsigned long ip; + unsigned long *ptr; + void *trampoline; + /* 48 8b 15 is movq (%rip), %rdx */ + unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + union ftrace_op_code_union op_ptr; + int ret; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + end_offset = (unsigned long)ftrace_regs_caller_end; + op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + } else { + start_offset = (unsigned long)ftrace_caller; + end_offset = (unsigned long)ftrace_caller_end; + op_offset = (unsigned long)ftrace_caller_op_ptr; + } + + size = end_offset - start_offset; + + /* + * Allocate enough size to store the ftrace_caller code, + * the jmp to ftrace_return, as well as the address of + * the ftrace_ops this trampoline is used for. + */ + trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); + if (!trampoline) + return 0; + + /* Copy ftrace_caller onto the trampoline memory */ + ret = probe_kernel_read(trampoline, (void *)start_offset, size); + if (WARN_ON(ret < 0)) { + tramp_free(trampoline); + return 0; + } + + ip = (unsigned long)trampoline + size; + + /* The trampoline ends with a jmp to ftrace_return */ + jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); + memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); + + /* + * The address of the ftrace_ops that is used for this trampoline + * is stored at the end of the trampoline. This will be used to + * load the third parameter for the callback. Basically, that + * location at the end of the trampoline takes the place of + * the global function_trace_op variable. + */ + + ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE); + *ptr = (unsigned long)ops; + + op_offset -= start_offset; + memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); + + /* Are we pointing to the reference? */ + if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) { + tramp_free(trampoline); + return 0; + } + + /* Load the contents of ptr into the callback parameter */ + offset = (unsigned long)ptr; + offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE; + + op_ptr.offset = offset; + + /* put in the new offset to the ftrace_ops */ + memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + + /* ALLOC_TRAMP flags lets us know we created it */ + ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; + + return (unsigned long)trampoline; +} + +void arch_ftrace_update_trampoline(struct ftrace_ops *ops) +{ + ftrace_func_t func; + unsigned char *new; + unsigned long start_offset; + unsigned long call_offset; + unsigned long offset; + unsigned long ip; + int ret; + + if (ops->trampoline) { + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + } else { + ops->trampoline = create_trampoline(ops); + if (!ops->trampoline) + return; + } + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + call_offset = (unsigned long)ftrace_regs_call; + } else { + start_offset = (unsigned long)ftrace_caller; + call_offset = (unsigned long)ftrace_call; + } + + offset = call_offset - start_offset; + ip = ops->trampoline + offset; + + func = ftrace_ops_get_func(ops); + + /* Do a safe modify in case the trampoline is executing */ + new = ftrace_call_replace(ip, (unsigned long)func); + ret = update_ftrace_func(ip, new); + + /* The update should never fail */ + WARN_ON(ret); +} +#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); static int ftrace_mod_jmp(unsigned long ip, void *func) { diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index c73aecf10d34..42f0cdd20baf 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -28,9 +28,11 @@ ENTRY(function_hook) END(function_hook) /* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup skip=0 +.macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip + /* Save this location */ +GLOBAL(\trace_label) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -46,7 +48,7 @@ END(function_hook) .endm ENTRY(ftrace_caller) - ftrace_caller_setup + ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -54,7 +56,14 @@ GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME -ftrace_return: + + /* + * The copied trampoline must call ftrace_return as it + * still may need to call the function graph tracer. + */ +GLOBAL(ftrace_caller_end) + +GLOBAL(ftrace_return) #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -70,7 +79,7 @@ ENTRY(ftrace_regs_caller) pushfq /* skip=8 to skip flags saved in SS */ - ftrace_caller_setup 8 + ftrace_caller_setup ftrace_regs_caller_op_ptr 8 /* Save the rest of pt_regs */ movq %r15, R15(%rsp) @@ -122,6 +131,14 @@ GLOBAL(ftrace_regs_call) /* Restore flags */ popfq + /* + * As this jmp to ftrace_return can be a short jump + * it must not be copied into the trampoline. + * The trampoline will add the code to jump + * to the return. + */ +GLOBAL(ftrace_regs_caller_end) + jmp ftrace_return popfq diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 662697babd48..06e3ca5a5083 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -94,6 +94,13 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ADDING - The ops is in the process of being added. * REMOVING - The ops is in the process of being removed. * MODIFYING - The ops is in the process of changing its filter functions. + * ALLOC_TRAMP - A dynamic trampoline was allocated by the core code. + * The arch specific code sets this flag when it allocated a + * trampoline. This lets the arch know that it can update the + * trampoline in case the callback function changes. + * The ftrace_ops trampoline can be set by the ftrace users, and + * in such cases the arch must not modify it. Only the arch ftrace + * core code should set this flag. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -108,6 +115,7 @@ enum { FTRACE_OPS_FL_ADDING = 1 << 9, FTRACE_OPS_FL_REMOVING = 1 << 10, FTRACE_OPS_FL_MODIFYING = 1 << 11, + FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 31c90fec4158..15f85eac7e95 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -387,6 +387,8 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, return ret; } +static void ftrace_update_trampoline(struct ftrace_ops *ops); + static int __register_ftrace_function(struct ftrace_ops *ops) { if (ops->flags & FTRACE_OPS_FL_DELETED) @@ -419,6 +421,8 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + ftrace_update_trampoline(ops); + if (ftrace_enabled) update_ftrace_function(); @@ -3020,9 +3024,6 @@ ftrace_enabled_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; - if (unlikely(ftrace_disabled)) - return -ENODEV; - iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); if (iter) { iter->pg = ftrace_pages_start; @@ -3975,6 +3976,9 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); +static unsigned long save_global_trampoline; +static unsigned long save_global_flags; + static int __init set_graph_function(char *str) { strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); @@ -4696,6 +4700,20 @@ void __init ftrace_init(void) ftrace_disabled = 1; } +/* Do nothing if arch does not support this */ +void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) +{ +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops) +{ + /* Currently, only non dynamic ops can have a trampoline */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + return; + + arch_ftrace_update_trampoline(ops); +} + #else static struct ftrace_ops global_ops = { @@ -4738,6 +4756,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 1; } +static void ftrace_update_trampoline(struct ftrace_ops *ops) +{ +} + #endif /* CONFIG_DYNAMIC_FTRACE */ __init void ftrace_init_global_array_ops(struct trace_array *tr) @@ -5522,7 +5544,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, update_function_graph_func(); ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); - out: mutex_unlock(&ftrace_lock); return ret; @@ -5543,6 +5564,17 @@ void unregister_ftrace_graph(void) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); +#ifdef CONFIG_DYNAMIC_FTRACE + /* + * Function graph does not allocate the trampoline, but + * other global_ops do. We need to reset the ALLOC_TRAMP flag + * if one was used. + */ + global_ops.trampoline = save_global_trampoline; + if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) + global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; +#endif + out: mutex_unlock(&ftrace_lock); } From 15d5b02cc575e5b20ddfa1645fc1242f0b0ba1c8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 3 Jul 2014 14:51:36 -0400 Subject: [PATCH 02/55] ftrace/x86: Show trampoline call function in enabled_functions The file /sys/kernel/debug/tracing/eneabled_functions is used to debug ftrace function hooks. Add to the output what function is being called by the trampoline if the arch supports it. Add support for this feature in x86_64. Cc: H. Peter Anvin Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 98 +++++++++++++++++++++++++++++++++++----- kernel/trace/ftrace.c | 22 ++++++++- 2 files changed, 106 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e4d48f6cad86..ca17c20a1010 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -48,7 +48,7 @@ int ftrace_arch_code_modify_post_process(void) union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { - char e8; + unsigned char e8; int offset; } __attribute__((packed)); }; @@ -797,12 +797,26 @@ static unsigned long create_trampoline(struct ftrace_ops *ops) return (unsigned long)trampoline; } +static unsigned long calc_trampoline_call_offset(bool save_regs) +{ + unsigned long start_offset; + unsigned long call_offset; + + if (save_regs) { + start_offset = (unsigned long)ftrace_regs_caller; + call_offset = (unsigned long)ftrace_regs_call; + } else { + start_offset = (unsigned long)ftrace_caller; + call_offset = (unsigned long)ftrace_call; + } + + return call_offset - start_offset; +} + void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; unsigned char *new; - unsigned long start_offset; - unsigned long call_offset; unsigned long offset; unsigned long ip; int ret; @@ -820,15 +834,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) return; } - if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { - start_offset = (unsigned long)ftrace_regs_caller; - call_offset = (unsigned long)ftrace_regs_call; - } else { - start_offset = (unsigned long)ftrace_caller; - call_offset = (unsigned long)ftrace_call; - } - - offset = call_offset - start_offset; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; func = ftrace_ops_get_func(ops); @@ -840,6 +846,74 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) /* The update should never fail */ WARN_ON(ret); } + +/* Return the address of the function the trampoline calls */ +static void *addr_from_call(void *ptr) +{ + union ftrace_code_union calc; + int ret; + + ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + if (WARN_ON_ONCE(ret < 0)) + return NULL; + + /* Make sure this is a call */ + if (WARN_ON_ONCE(calc.e8 != 0xe8)) { + pr_warn("Expected e8, got %x\n", calc.e8); + return NULL; + } + + return ptr + MCOUNT_INSN_SIZE + calc.offset; +} + +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer); + +/* + * If the ops->trampoline was not allocated, then it probably + * has a static trampoline func, or is the ftrace caller itself. + */ +static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + unsigned long offset; + bool save_regs = rec->flags & FTRACE_FL_REGS_EN; + void *ptr; + + if (ops && ops->trampoline) { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* + * We only know about function graph tracer setting as static + * trampoline. + */ + if (ops->trampoline == FTRACE_GRAPH_ADDR) + return (void *)prepare_ftrace_return; +#endif + return NULL; + } + + offset = calc_trampoline_call_offset(save_regs); + + if (save_regs) + ptr = (void *)FTRACE_REGS_ADDR + offset; + else + ptr = (void *)FTRACE_ADDR + offset; + + return addr_from_call(ptr); +} + +void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + unsigned long offset; + + /* If we didn't allocate this trampoline, consider it static */ + if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return static_tramp_func(ops, rec); + + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); + return addr_from_call((void *)ops->trampoline + offset); +} + + #endif /* CONFIG_X86_64 */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 15f85eac7e95..422e1f8300b1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2952,6 +2952,22 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&ftrace_lock); } +void * __weak +arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + return NULL; +} + +static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, + struct dyn_ftrace *rec) +{ + void *ptr; + + ptr = arch_ftrace_trampoline_func(ops, rec); + if (ptr) + seq_printf(m, " ->%pS", ptr); +} + static int t_show(struct seq_file *m, void *v) { struct ftrace_iterator *iter = m->private; @@ -2975,19 +2991,21 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) { + struct ftrace_ops *ops = NULL; + seq_printf(m, " (%ld)%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { - struct ftrace_ops *ops; - ops = ftrace_find_tramp_ops_any(rec); if (ops) seq_printf(m, "\ttramp: %pS", (void *)ops->trampoline); else seq_printf(m, "\ttramp: ERROR!"); + } + add_trampoline_func(m, ops, rec); } seq_printf(m, "\n"); From 12cce594fa8f12e002e7eb5d10141853c1e6a112 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 3 Jul 2014 15:48:16 -0400 Subject: [PATCH 03/55] ftrace/x86: Allow !CONFIG_PREEMPT dynamic ops to use allocated trampolines When the static ftrace_ops (like function tracer) enables tracing, and it is the only callback that is referencing a function, a trampoline is dynamically allocated to the function that calls the callback directly instead of calling a loop function that iterates over all the registered ftrace ops (if more than one ops is registered). But when it comes to dynamically allocated ftrace_ops, where they may be freed, on a CONFIG_PREEMPT kernel there's no way to know when it is safe to free the trampoline. If a task was preempted while executing on the trampoline, there's currently no way to know when it will be off that trampoline. But this is not true when it comes to !CONFIG_PREEMPT. The current method of calling schedule_on_each_cpu() will force tasks off the trampoline, becaues they can not schedule while on it (kernel preemption is not configured). That means it is safe to free a dynamically allocated ftrace ops trampoline when CONFIG_PREEMPT is not configured. Cc: H. Peter Anvin Cc: Paul E. McKenney Acked-by: Borislav Petkov Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 ++++++++ kernel/trace/ftrace.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ca17c20a1010..4cfeca6ffe11 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -913,6 +913,14 @@ void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec return addr_from_call((void *)ops->trampoline + offset); } +void arch_ftrace_trampoline_free(struct ftrace_ops *ops) +{ + if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + + tramp_free((void *)ops->trampoline); + ops->trampoline = 0; +} #endif /* CONFIG_X86_64 */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 422e1f8300b1..eab3123a1fbe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2324,6 +2324,10 @@ static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; +void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) +{ +} + static void control_ops_free(struct ftrace_ops *ops) { free_percpu(ops->disabled); @@ -2475,6 +2479,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { schedule_on_each_cpu(ftrace_sync); + arch_ftrace_trampoline_free(ops); + if (ops->flags & FTRACE_OPS_FL_CONTROL) control_ops_free(ops); } @@ -4725,9 +4731,21 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) static void ftrace_update_trampoline(struct ftrace_ops *ops) { + +/* + * Currently there's no safe way to free a trampoline when the kernel + * is configured with PREEMPT. That is because a task could be preempted + * when it jumped to the trampoline, it may be preempted for a long time + * depending on the system load, and currently there's no way to know + * when it will be off the trampoline. If the trampoline is freed + * too early, when the task runs again, it will be executing on freed + * memory and crash. + */ +#ifdef CONFIG_PREEMPT /* Currently, only non dynamic ops can have a trampoline */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) return; +#endif arch_ftrace_update_trampoline(ops); } From 4fd3279b48605ae3ea509b9b2c02e46aa0975930 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 24 Oct 2014 17:56:04 -0400 Subject: [PATCH 04/55] ftrace: Add more information to ftrace_bug() output With the introduction of the dynamic trampolines, it is useful that if things go wrong that ftrace_bug() produces more information about what the current state is. This can help debug issues that may arise. Ftrace has lots of checks to make sure that the state of the system it touchs is exactly what it expects it to be. When it detects an abnormality it calls ftrace_bug() and disables itself to prevent any further damage. It is crucial that ftrace_bug() produces sufficient information that can be used to debug the situation. Cc: Benjamin Herrenschmidt Acked-by: Borislav Petkov Tested-by: Masami Hiramatsu Tested-by: Jiri Kosina Signed-off-by: Steven Rostedt --- arch/powerpc/kernel/ftrace.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 4 +++- kernel/trace/ftrace.c | 38 +++++++++++++++++++++++++++--------- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c0f03d..e66af6d265e8 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -449,7 +449,7 @@ void ftrace_replace_code(int enable) rec = ftrace_rec_iter_record(iter); ret = __ftrace_replace_code(rec, enable); if (ret) { - ftrace_bug(ret, rec->ip); + ftrace_bug(ret, rec); return; } } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4cfeca6ffe11..1aea94d336c7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -583,7 +583,7 @@ void ftrace_replace_code(int enable) remove_breakpoints: pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec ? rec->ip : 0); + ftrace_bug(ret, rec); for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 06e3ca5a5083..619e37cc17fd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -263,7 +263,9 @@ struct ftrace_func_command { int ftrace_arch_code_modify_prepare(void); int ftrace_arch_code_modify_post_process(void); -void ftrace_bug(int err, unsigned long ip); +struct dyn_ftrace; + +void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eab3123a1fbe..4043332f6720 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1738,10 +1738,13 @@ static void print_ip_ins(const char *fmt, unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } +static struct ftrace_ops * +ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); + /** * ftrace_bug - report and shutdown function tracer * @failed: The failed type (EFAULT, EINVAL, EPERM) - * @ip: The address that failed + * @rec: The record that failed * * The arch code that enables or disables the function tracing * can call ftrace_bug() when it has detected a problem in @@ -1750,8 +1753,10 @@ static void print_ip_ins(const char *fmt, unsigned char *p) * EINVAL - if what is read at @ip is not what was expected * EPERM - if the problem happens on writting to the @ip address */ -void ftrace_bug(int failed, unsigned long ip) +void ftrace_bug(int failed, struct dyn_ftrace *rec) { + unsigned long ip = rec ? rec->ip : 0; + switch (failed) { case -EFAULT: FTRACE_WARN_ON_ONCE(1); @@ -1763,7 +1768,7 @@ void ftrace_bug(int failed, unsigned long ip) pr_info("ftrace failed to modify "); print_ip_sym(ip); print_ip_ins(" actual: ", (unsigned char *)ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); break; case -EPERM: FTRACE_WARN_ON_ONCE(1); @@ -1775,6 +1780,24 @@ void ftrace_bug(int failed, unsigned long ip) pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } + if (rec) { + struct ftrace_ops *ops = NULL; + + pr_info("ftrace record flags: %lx\n", rec->flags); + pr_cont(" (%ld)%s", ftrace_rec_count(rec), + rec->flags & FTRACE_FL_REGS ? " R" : " "); + if (rec->flags & FTRACE_FL_TRAMP_EN) { + ops = ftrace_find_tramp_ops_any(rec); + if (ops) + pr_cont("\ttramp: %pS", + (void *)ops->trampoline); + else + pr_cont("\ttramp: ERROR!"); + + } + ip = ftrace_get_addr_curr(rec); + pr_cont(" expected tramp: %lx\n", ip); + } } static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) @@ -2097,7 +2120,7 @@ void __weak ftrace_replace_code(int enable) do_for_each_ftrace_rec(pg, rec) { failed = __ftrace_replace_code(rec, enable); if (failed) { - ftrace_bug(failed, rec->ip); + ftrace_bug(failed, rec); /* Stop processing */ return; } @@ -2179,17 +2202,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { - unsigned long ip; int ret; - ip = rec->ip; - if (unlikely(ftrace_disabled)) return 0; ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); if (ret) { - ftrace_bug(ret, ip); + ftrace_bug(ret, rec); return 0; } return 1; @@ -2633,7 +2653,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(p, 1); if (failed) - ftrace_bug(failed, p->ip); + ftrace_bug(failed, p); } } } From 632537256e9f969a188cc4d0159e0027a459d3e7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Jul 2014 21:35:01 +0200 Subject: [PATCH 05/55] tracing: Kill tracing_{start,stop}_sched_switch_record() and tracing_sched_switch_assign_trace() tracing_{start,stop}_sched_switch_record() have no callers since 87d80de2800d "tracing: Remove obsolete sched_switch tracer". The last caller of tracing_sched_switch_assign_trace() was removed by 30dbb20e68e6 "tracing: Remove boot tracer". Link: http://lkml.kernel.org/p/20140723193501.GA30214@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 3 -- kernel/trace/trace_sched_switch.c | 48 ------------------------------- 2 files changed, 51 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 385391fb1d3b..a3a82d5f25dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -597,9 +597,6 @@ void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); -void tracing_sched_switch_assign_trace(struct trace_array *tr); -void tracing_stop_sched_switch_record(void); -void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3f34dc9b40f3..3b60301c59d2 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -197,51 +197,3 @@ void tracing_stop_cmdline_record(void) { tracing_stop_sched_switch(); } - -/** - * tracing_start_sched_switch_record - start tracing context switches - * - * Turns on context switch tracing for a tracer. - */ -void tracing_start_sched_switch_record(void) -{ - if (unlikely(!ctx_trace)) { - WARN_ON(1); - return; - } - - tracing_start_sched_switch(); - - mutex_lock(&sched_register_mutex); - tracer_enabled++; - mutex_unlock(&sched_register_mutex); -} - -/** - * tracing_stop_sched_switch_record - start tracing context switches - * - * Turns off context switch tracing for a tracer. - */ -void tracing_stop_sched_switch_record(void) -{ - mutex_lock(&sched_register_mutex); - tracer_enabled--; - WARN_ON(tracer_enabled < 0); - mutex_unlock(&sched_register_mutex); - - tracing_stop_sched_switch(); -} - -/** - * tracing_sched_switch_assign_trace - assign a trace array for ctx switch - * @tr: trace array pointer to assign - * - * Some tracers might want to record the context switches in their - * trace. This function lets those tracers assign the trace array - * to use. - */ -void tracing_sched_switch_assign_trace(struct trace_array *tr) -{ - ctx_trace = tr; -} - From 458faf0b88b19a46d51bb9760fa6e03a1bc6d97b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Jul 2014 21:35:03 +0200 Subject: [PATCH 06/55] tracing: Kill the dead code in probe_sched_switch() and probe_sched_wakeup() After the previous patch it is clear that "tracer_enabled" can never be true, we can remove the "if (tracer_enabled)" code in probe_sched_switch() and probe_sched_wakeup(). Plus we can obviously remove tracer_enabled, ctx_trace, and sched_stopped as well. Link: http://lkml.kernel.org/p/20140723193503.GA30217@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_sched_switch.c | 40 ------------------------------- 1 file changed, 40 deletions(-) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3b60301c59d2..f7c7f4f1544c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -14,12 +14,8 @@ #include "trace.h" -static struct trace_array *ctx_trace; -static int __read_mostly tracer_enabled; static int sched_ref; static DEFINE_MUTEX(sched_register_mutex); -static int sched_stopped; - void tracing_sched_switch_trace(struct trace_array *tr, @@ -52,29 +48,11 @@ tracing_sched_switch_trace(struct trace_array *tr, static void probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) { - struct trace_array_cpu *data; - unsigned long flags; - int cpu; - int pc; - if (unlikely(!sched_ref)) return; tracing_record_cmdline(prev); tracing_record_cmdline(next); - - if (!tracer_enabled || sched_stopped) - return; - - pc = preempt_count(); - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); - - if (likely(!atomic_read(&data->disabled))) - tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); - - local_irq_restore(flags); } void @@ -108,28 +86,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, static void probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) { - struct trace_array_cpu *data; - unsigned long flags; - int cpu, pc; - if (unlikely(!sched_ref)) return; tracing_record_cmdline(current); - - if (!tracer_enabled || sched_stopped) - return; - - pc = preempt_count(); - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); - - if (likely(!atomic_read(&data->disabled))) - tracing_sched_wakeup_trace(ctx_trace, wakee, current, - flags, pc); - - local_irq_restore(flags); } static int tracing_sched_register(void) From 243f7610a68a606eb1787c09450a440bf30bebe0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 30 Oct 2014 20:44:53 -0400 Subject: [PATCH 07/55] tracing: Move tracing_sched_{switch,wakeup}() into wakeup tracer The only code that references tracing_sched_switch_trace() and tracing_sched_wakeup_trace() is the wakeup latency tracer. Those two functions use to belong to the sched_switch tracer which has long been removed. These functions were left behind because the wakeup latency tracer used them. But since the wakeup latency tracer is the only one to use them, they should be static functions inside that code. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 9 ----- kernel/trace/trace_sched_switch.c | 56 ------------------------------- kernel/trace/trace_sched_wakeup.c | 56 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 65 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a3a82d5f25dc..3376de623ea0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -569,15 +569,6 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void tracing_sched_switch_trace(struct trace_array *tr, - struct task_struct *prev, - struct task_struct *next, - unsigned long flags, int pc); - -void tracing_sched_wakeup_trace(struct trace_array *tr, - struct task_struct *wakee, - struct task_struct *cur, - unsigned long flags, int pc); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index f7c7f4f1544c..2e293beb186e 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -17,34 +17,6 @@ static int sched_ref; static DEFINE_MUTEX(sched_register_mutex); -void -tracing_sched_switch_trace(struct trace_array *tr, - struct task_struct *prev, - struct task_struct *next, - unsigned long flags, int pc) -{ - struct ftrace_event_call *call = &event_context_switch; - struct ring_buffer *buffer = tr->trace_buffer.buffer; - struct ring_buffer_event *event; - struct ctx_switch_entry *entry; - - event = trace_buffer_lock_reserve(buffer, TRACE_CTX, - sizeof(*entry), flags, pc); - if (!event) - return; - entry = ring_buffer_event_data(event); - entry->prev_pid = prev->pid; - entry->prev_prio = prev->prio; - entry->prev_state = prev->state; - entry->next_pid = next->pid; - entry->next_prio = next->prio; - entry->next_state = next->state; - entry->next_cpu = task_cpu(next); - - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, flags, pc); -} - static void probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) { @@ -55,34 +27,6 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n tracing_record_cmdline(next); } -void -tracing_sched_wakeup_trace(struct trace_array *tr, - struct task_struct *wakee, - struct task_struct *curr, - unsigned long flags, int pc) -{ - struct ftrace_event_call *call = &event_wakeup; - struct ring_buffer_event *event; - struct ctx_switch_entry *entry; - struct ring_buffer *buffer = tr->trace_buffer.buffer; - - event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, - sizeof(*entry), flags, pc); - if (!event) - return; - entry = ring_buffer_event_data(event); - entry->prev_pid = curr->pid; - entry->prev_prio = curr->prio; - entry->prev_state = curr->state; - entry->next_pid = wakee->pid; - entry->next_prio = wakee->prio; - entry->next_state = wakee->state; - entry->next_cpu = task_cpu(wakee); - - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, flags, pc); -} - static void probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) { diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 19bd8928ce94..8fb84b362816 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -365,6 +365,62 @@ probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu) wakeup_current_cpu = cpu; } +static void +tracing_sched_switch_trace(struct trace_array *tr, + struct task_struct *prev, + struct task_struct *next, + unsigned long flags, int pc) +{ + struct ftrace_event_call *call = &event_context_switch; + struct ring_buffer *buffer = tr->trace_buffer.buffer; + struct ring_buffer_event *event; + struct ctx_switch_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_CTX, + sizeof(*entry), flags, pc); + if (!event) + return; + entry = ring_buffer_event_data(event); + entry->prev_pid = prev->pid; + entry->prev_prio = prev->prio; + entry->prev_state = prev->state; + entry->next_pid = next->pid; + entry->next_prio = next->prio; + entry->next_state = next->state; + entry->next_cpu = task_cpu(next); + + if (!call_filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, flags, pc); +} + +static void +tracing_sched_wakeup_trace(struct trace_array *tr, + struct task_struct *wakee, + struct task_struct *curr, + unsigned long flags, int pc) +{ + struct ftrace_event_call *call = &event_wakeup; + struct ring_buffer_event *event; + struct ctx_switch_entry *entry; + struct ring_buffer *buffer = tr->trace_buffer.buffer; + + event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, + sizeof(*entry), flags, pc); + if (!event) + return; + entry = ring_buffer_event_data(event); + entry->prev_pid = curr->pid; + entry->prev_prio = curr->prio; + entry->prev_state = curr->state; + entry->next_pid = wakee->pid; + entry->next_prio = wakee->prio; + entry->next_state = wakee->state; + entry->next_cpu = task_cpu(wakee); + + if (!call_filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, flags, pc); +} + static void notrace probe_wakeup_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) From 26488b3723270880a28b542ff2276689506d6a9f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 22 Aug 2013 20:59:39 +0800 Subject: [PATCH 08/55] tracing: Add entry->next_cpu to trace_ctxwake_bin() Function trace_ctxwake_bin() misses ctx_switch_entry->next_cpu field, so user will get stale value for "next_cpu". Link: http://lkml.kernel.org/p/1377176379-27908-1-git-send-email-liuj97@gmail.com Signed-off-by: Jiang Liu Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c6977d5a9b12..cfa91de22e27 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -976,6 +976,7 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, SEQ_PUT_FIELD_RET(s, field->prev_pid); SEQ_PUT_FIELD_RET(s, field->prev_prio); SEQ_PUT_FIELD_RET(s, field->prev_state); + SEQ_PUT_FIELD_RET(s, field->next_cpu); SEQ_PUT_FIELD_RET(s, field->next_pid); SEQ_PUT_FIELD_RET(s, field->next_prio); SEQ_PUT_FIELD_RET(s, field->next_state); From fe578ba36fe7a118298028031b3ce562e6ea1c4c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 13 Nov 2014 19:40:56 -0500 Subject: [PATCH 09/55] ftrace: Have the control_ops get a trampoline With the new logic, if only a single user of ftrace function hooks is used, it will get its own trampoline assigned to it. The problem is that the control_ops is an indirect ops that perf ops uses. What that means is that when perf registers its ops with register_ftrace_function(), it has the CONTROL flag set and gets added to the control list instead of the global ftrace list. The control_ops gets added to that instead and the mcount trampoline calls the control_ops function. The control_ops function will iterate the control list and call the ops functions that are attached to it. But currently the trampoline is added to the perf ops and not the control ops, and when ftrace tries to find a trampoline hook for it, it fails to find one and gives the following splat: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 10133 at kernel/trace/ftrace.c:2033 ftrace_get_addr_new+0x6f/0xc0() Modules linked in: [...] CPU: 0 PID: 10133 Comm: perf Tainted: P 3.18.0-rc1-test+ #388 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 00000000000007f1 ffff8800c2643bc8 ffffffff814fca6e ffff88011ea0ed01 0000000000000000 ffff8800c2643c08 ffffffff81041ffd 0000000000000000 ffffffff810c388c ffffffff81a5a350 ffff880119b00000 ffffffff810001c8 Call Trace: [] dump_stack+0x46/0x58 [] warn_slowpath_common+0x81/0x9b [] ? ftrace_get_addr_new+0x6f/0xc0 [] ? 0xffffffff810001c8 [] warn_slowpath_null+0x1a/0x1c [] ftrace_get_addr_new+0x6f/0xc0 [] ftrace_replace_code+0xd6/0x334 [] ftrace_modify_all_code+0x41/0xc5 [] arch_ftrace_update_code+0x10/0x19 [] ftrace_run_update_code+0x21/0x42 [] ftrace_startup_enable+0x32/0x34 [] ftrace_startup+0x14e/0x15a [] register_ftrace_function+0x27/0x40 [] perf_ftrace_event_register+0x3e/0xee [] perf_trace_init+0x29d/0x2a9 [] perf_tp_event_init+0x27/0x3a [] perf_init_event+0x9e/0xed [] perf_event_alloc+0x299/0x330 [] SYSC_perf_event_open+0x3ee/0x816 [] ? mntput+0x2d/0x2f [] ? __fput+0xa7/0x1b2 [] ? do_gettimeofday+0x22/0x3a [] SyS_perf_event_open+0x9/0xb [] system_call_fastpath+0x12/0x17 ---[ end trace 81a53565150e4982 ]--- Bad trampoline accounting at: ffffffff810001c8 (run_init_process+0x0/0x2d) (10000001) Update the control_ops trampoline instead of the perf ops one. Reported-by: lkp@01.org Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4043332f6720..1a13e615a068 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -418,6 +418,8 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (control_ops_alloc(ops)) return -ENOMEM; add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); + /* The control_ops needs the trampoline update */ + ops = &control_ops; } else add_ftrace_ops(&ftrace_ops_list, ops); From 933ff9f20220e1339964c7a76a34ca9aaa4a8c37 Mon Sep 17 00:00:00 2001 From: "Luis Claudio R. Goncalves" Date: Wed, 12 Nov 2014 21:14:00 -0200 Subject: [PATCH 10/55] tracing: Fix traceoff_on_warning handling on boot command line According to the documentation, adding "traceoff_on_warning" to the boot command line should be enough to enable the feature. But right now it is necessary to specify "traceoff_on_warning=". Along with fixing that, also verify if the value passed, if any, is either "0" or "off". Link: http://lkml.kernel.org/r/20141112231400.GL12281@uudg.org Signed-off-by: Luis Claudio R. Goncalves Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a528392b1f4..75798ab8eb88 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -155,10 +155,11 @@ __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); static int __init stop_trace_on_warning(char *str) { - __disable_trace_on_warning = 1; + if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) + __disable_trace_on_warning = 1; return 1; } -__setup("traceoff_on_warning=", stop_trace_on_warning); +__setup("traceoff_on_warning", stop_trace_on_warning); static int __init boot_alloc_snapshot(char *str) { From c270cc75cd22b606c901e606fbb93b002c2a655f Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 12:41:55 +0000 Subject: [PATCH 11/55] tracing: kdb: Fix kernel panic during ftdump Currently kdb's ftdump command unconditionally crashes due to a null pointer de-reference whenever the command is run. This in turn causes the kernel to panic. The abridged stacktrace (gathered with ARCH=arm) is: --- cut here --- [] (panic) from [] (die+0x264/0x440) [] (die) from [] (__do_kernel_fault.part.11+0x74/0x84) [] (__do_kernel_fault.part.11) from [] (do_page_fault+0x1d0/0x3c4) [] (do_page_fault) from [] (do_DataAbort+0x48/0xac) [] (do_DataAbort) from [] (__dabt_svc+0x38/0x60) Exception stack(0xc0deba88 to 0xc0debad0) ba80: e8c29180 00000001 e9854304 e9854300 c0f567d8 c0df2580 baa0: 00000000 00000000 00000000 c0f117b8 c0e3a3c0 c0debb0c 00000000 c0debad0 bac0: 0000672e c02f4d60 60000193 ffffffff [] (__dabt_svc) from [] (kdb_ftdump+0x1e4/0x3d8) [] (kdb_ftdump) from [] (kdb_parse+0x2b8/0x698) [] (kdb_parse) from [] (kdb_main_loop+0x52c/0x784) [] (kdb_main_loop) from [] (kdb_stub+0x238/0x490) --- cut here --- The NULL deref occurs due to the initialized use of struct trace_iter's buffer_iter member. This is a regression, albeit a fairly elderly one. It was introduced by commit 6d158a813efc ("tracing: Remove NR_CPUS array from trace_iterator"). This patch solves this by providing a collection of ring_buffer_iter(s) and using this to initialize buffer_iter. Note that static allocation is used solely because the trace_iter itself is also static allocated. Static allocation also means that we have to NULL-ify the pointer during cleanup to avoid use-after-free problems. Link: http://lkml.kernel.org/r/1415277716-19419-2-git-send-email-daniel.thompson@linaro.org Cc: Jason Wessel Signed-off-by: Daniel Thompson Signed-off-by: Steven Rostedt --- kernel/trace/trace_kdb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index bd90e1b06088..8faa7ce58814 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -20,10 +20,12 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) { /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; + static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; unsigned int old_userobj; int cnt = 0, cpu; trace_init_global_iter(&iter); + iter.buffer_iter = buffer_iter; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); @@ -86,9 +88,12 @@ out: atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } - for_each_tracing_cpu(cpu) - if (iter.buffer_iter[cpu]) + for_each_tracing_cpu(cpu) { + if (iter.buffer_iter[cpu]) { ring_buffer_read_finish(iter.buffer_iter[cpu]); + iter.buffer_iter[cpu] = NULL; + } + } } /* From 8520dedbbf7578a397ecdfcf6ab83f775f914cfe Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 12:41:56 +0000 Subject: [PATCH 12/55] tracing: kdb: Fix kernel livelock with empty buffers Currently kdb's ftdump command will livelock by constantly printk'ing the empty string at KERN_EMERG level if it run when the ftrace system is not in use. This occurs because trace_empty() never returns false when the ring buffers are left at the start of a non-consuming read [launched by ring_buffer_read_start()]. This patch changes the loop exit condition to use the result of trace_find_next_entry_inc(). Effectively this switches the non-consuming kdb dumper to follow the approach of the non-consuming userspace interface [s_next()] rather than the consuming ftrace_dump(). Link: http://lkml.kernel.org/r/1415277716-19419-3-git-send-email-daniel.thompson@linaro.org Cc: Ingo Molnar Cc: Andrew Morton Cc: John Stultz Cc: Sumit Semwal Cc: Jason Wessel Signed-off-by: Daniel Thompson Signed-off-by: Steven Rostedt --- kernel/trace/trace_kdb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 8faa7ce58814..b0b1c44e923a 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -59,19 +59,19 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); } - if (!trace_empty(&iter)) - trace_find_next_entry_inc(&iter); - while (!trace_empty(&iter)) { + + while (trace_find_next_entry_inc(&iter)) { if (!cnt) kdb_printf("---------------------------------\n"); cnt++; - if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines) + if (!skip_lines) { print_trace_line(&iter); - if (!skip_lines) trace_printk_seq(&iter.seq); - else + } else { skip_lines--; + } + if (KDB_FLAG(CMD_INTERRUPT)) goto out; } From fa6f0cc751d377af3f4f1484bceb47dc10163753 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 8 Nov 2014 21:42:10 +0100 Subject: [PATCH 13/55] tracing: Replace seq_printf by simpler equivalents Using seq_printf to print a simple string or a single character is a lot more expensive than it needs to be, since seq_puts and seq_putc exist. These patches do seq_printf(m, s) -> seq_puts(m, s) seq_printf(m, "%s", s) -> seq_puts(m, s) seq_printf(m, "%c", c) -> seq_putc(m, c) Subsequent patches will simplify further. Link: http://lkml.kernel.org/r/1415479332-25944-2-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 30 +++++++++---------- kernel/trace/trace.c | 44 ++++++++++++++-------------- kernel/trace/trace_branch.c | 26 ++++++++-------- kernel/trace/trace_events.c | 4 +-- kernel/trace/trace_events_trigger.c | 2 +- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_functions_graph.c | 28 +++++++++--------- kernel/trace/trace_kprobe.c | 4 +-- kernel/trace/trace_uprobe.c | 2 +- 9 files changed, 71 insertions(+), 71 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1a13e615a068..6233f9102179 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -571,13 +571,13 @@ static int function_stat_cmp(void *p1, void *p2) static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER - seq_printf(m, " Function " - "Hit Time Avg s^2\n" - " -------- " - "--- ---- --- ---\n"); + seq_puts(m, " Function " + "Hit Time Avg s^2\n" + " -------- " + "--- ---- --- ---\n"); #else - seq_printf(m, " Function Hit\n" - " -------- ---\n"); + seq_puts(m, " Function Hit\n" + " -------- ---\n"); #endif return 0; } @@ -604,7 +604,7 @@ static int function_stat_show(struct seq_file *m, void *v) seq_printf(m, " %-30.30s %10lu", str, rec->counter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - seq_printf(m, " "); + seq_puts(m, " "); avg = rec->time; do_div(avg, rec->counter); @@ -3006,9 +3006,9 @@ static int t_show(struct seq_file *m, void *v) if (iter->flags & FTRACE_ITER_PRINTALL) { if (iter->flags & FTRACE_ITER_NOTRACE) - seq_printf(m, "#### no functions disabled ####\n"); + seq_puts(m, "#### no functions disabled ####\n"); else - seq_printf(m, "#### all functions enabled ####\n"); + seq_puts(m, "#### all functions enabled ####\n"); return 0; } @@ -3030,13 +3030,13 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "\ttramp: %pS", (void *)ops->trampoline); else - seq_printf(m, "\ttramp: ERROR!"); + seq_puts(m, "\ttramp: ERROR!"); } add_trampoline_func(m, ops, rec); } - seq_printf(m, "\n"); + seq_putc(m, '\n'); return 0; } @@ -4233,9 +4233,9 @@ static int g_show(struct seq_file *m, void *v) struct ftrace_graph_data *fgd = m->private; if (fgd->table == ftrace_graph_funcs) - seq_printf(m, "#### all functions enabled ####\n"); + seq_puts(m, "#### all functions enabled ####\n"); else - seq_printf(m, "#### no functions disabled ####\n"); + seq_puts(m, "#### no functions disabled ####\n"); return 0; } @@ -5155,12 +5155,12 @@ static int fpid_show(struct seq_file *m, void *v) const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); if (v == (void *)1) { - seq_printf(m, "no pid\n"); + seq_puts(m, "no pid\n"); return 0; } if (fpid->pid == ftrace_swapper_pid) - seq_printf(m, "swapper tasks\n"); + seq_puts(m, "swapper tasks\n"); else seq_printf(m, "%u\n", pid_vnr(fpid->pid)); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 75798ab8eb88..a419db75152c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2860,44 +2860,44 @@ static void test_ftrace_alive(struct seq_file *m) { if (!ftrace_is_dead()) return; - seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); - seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); + seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); + seq_puts(m, "# MAY BE MISSING FUNCTION EVENTS\n"); } #ifdef CONFIG_TRACER_MAX_TRACE static void show_snapshot_main_help(struct seq_file *m) { - seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); - seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); - seq_printf(m, "# Takes a snapshot of the main buffer.\n"); - seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"); - seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); - seq_printf(m, "# is not a '0' or '1')\n"); + seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); + seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); + seq_puts(m, "# Takes a snapshot of the main buffer.\n"); + seq_puts(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"); + seq_puts(m, "# (Doesn't have to be '2' works with any number that\n"); + seq_puts(m, "# is not a '0' or '1')\n"); } static void show_snapshot_percpu_help(struct seq_file *m) { - seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); + seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP - seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); - seq_printf(m, "# Takes a snapshot of the main buffer for this cpu.\n"); + seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); + seq_puts(m, "# Takes a snapshot of the main buffer for this cpu.\n"); #else - seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n"); - seq_printf(m, "# Must use main snapshot file to allocate.\n"); + seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"); + seq_puts(m, "# Must use main snapshot file to allocate.\n"); #endif - seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"); - seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); - seq_printf(m, "# is not a '0' or '1')\n"); + seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"); + seq_puts(m, "# (Doesn't have to be '2' works with any number that\n"); + seq_puts(m, "# is not a '0' or '1')\n"); } static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { if (iter->tr->allocated_snapshot) - seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); + seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); else - seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); + seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); - seq_printf(m, "# Snapshot commands:\n"); + seq_puts(m, "# Snapshot commands:\n"); if (iter->cpu_file == RING_BUFFER_ALL_CPUS) show_snapshot_main_help(m); else @@ -3251,7 +3251,7 @@ static int t_show(struct seq_file *m, void *v) if (!t) return 0; - seq_printf(m, "%s", t->name); + seq_puts(m, t->name); if (t->next) seq_putc(m, ' '); else @@ -5753,10 +5753,10 @@ ftrace_snapshot_print(struct seq_file *m, unsigned long ip, seq_printf(m, "%ps:", (void *)ip); - seq_printf(m, "snapshot"); + seq_puts(m, "snapshot"); if (count == -1) - seq_printf(m, ":unlimited\n"); + seq_puts(m, ":unlimited\n"); else seq_printf(m, ":count=%ld\n", count); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 697fb9bac8f0..126c622e4f42 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -233,12 +233,12 @@ extern unsigned long __stop_annotated_branch_profile[]; static int annotated_branch_stat_headers(struct seq_file *m) { - seq_printf(m, " correct incorrect %% "); - seq_printf(m, " Function " - " File Line\n" - " ------- --------- - " - " -------- " - " ---- ----\n"); + seq_puts(m, " correct incorrect % "); + seq_puts(m, " Function " + " File Line\n" + " ------- --------- - " + " -------- " + " ---- ----\n"); return 0; } @@ -274,7 +274,7 @@ static int branch_stat_show(struct seq_file *m, void *v) seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); if (percent < 0) - seq_printf(m, " X "); + seq_puts(m, " X "); else seq_printf(m, "%3ld ", percent); seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); @@ -362,12 +362,12 @@ extern unsigned long __stop_branch_profile[]; static int all_branch_stat_headers(struct seq_file *m) { - seq_printf(m, " miss hit %% "); - seq_printf(m, " Function " - " File Line\n" - " ------- --------- - " - " -------- " - " ---- ----\n"); + seq_puts(m, " miss hit % "); + seq_puts(m, " Function " + " File Line\n" + " ------- --------- - " + " -------- " + " ---- ----\n"); return 0; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0cc51edde3a8..f9d0cbe014b7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -918,7 +918,7 @@ static int f_show(struct seq_file *m, void *v) case FORMAT_HEADER: seq_printf(m, "name: %s\n", ftrace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); - seq_printf(m, "format:\n"); + seq_puts(m, "format:\n"); return 0; case FORMAT_FIELD_SEPERATOR: @@ -1988,7 +1988,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, ftrace_event_name(data->file->event_call)); if (data->count == -1) - seq_printf(m, ":unlimited\n"); + seq_puts(m, ":unlimited\n"); else seq_printf(m, ":count=%ld\n", data->count); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4747b476a030..3d89ffc66740 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -373,7 +373,7 @@ event_trigger_print(const char *name, struct seq_file *m, { long count = (long)data; - seq_printf(m, "%s", name); + seq_puts(m, name); if (count == -1) seq_puts(m, ":unlimited"); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 57f0ec962d2c..a8e0c7666164 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -361,7 +361,7 @@ ftrace_probe_print(const char *name, struct seq_file *m, seq_printf(m, "%ps:%s", (void *)ip, name); if (count == -1) - seq_printf(m, ":unlimited\n"); + seq_puts(m, ":unlimited\n"); else seq_printf(m, ":count=%ld\n", count); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f0a0c982cde3..29ba611e6f57 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1407,32 +1407,32 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) print_lat_header(s, flags); /* 1st line */ - seq_printf(s, "#"); + seq_puts(s, "#"); if (flags & TRACE_GRAPH_PRINT_ABS_TIME) - seq_printf(s, " TIME "); + seq_puts(s, " TIME "); if (flags & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, " CPU"); + seq_puts(s, " CPU"); if (flags & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " TASK/PID "); + seq_puts(s, " TASK/PID "); if (lat) - seq_printf(s, "||||"); + seq_puts(s, "||||"); if (flags & TRACE_GRAPH_PRINT_DURATION) - seq_printf(s, " DURATION "); - seq_printf(s, " FUNCTION CALLS\n"); + seq_puts(s, " DURATION "); + seq_puts(s, " FUNCTION CALLS\n"); /* 2nd line */ - seq_printf(s, "#"); + seq_puts(s, "#"); if (flags & TRACE_GRAPH_PRINT_ABS_TIME) - seq_printf(s, " | "); + seq_puts(s, " | "); if (flags & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, " | "); + seq_puts(s, " | "); if (flags & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " | | "); + seq_puts(s, " | | "); if (lat) - seq_printf(s, "||||"); + seq_puts(s, "||||"); if (flags & TRACE_GRAPH_PRINT_DURATION) - seq_printf(s, " | | "); - seq_printf(s, " | | | |\n"); + seq_puts(s, " | | "); + seq_puts(s, " | | | |\n"); } static void print_graph_headers(struct seq_file *s) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 282f6e4e5539..240d27ddf82e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -826,7 +826,7 @@ static int probes_seq_show(struct seq_file *m, void *v) struct trace_kprobe *tk = v; int i; - seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p'); + seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); seq_printf(m, ":%s/%s", tk->tp.call.class->system, ftrace_event_name(&tk->tp.call)); @@ -840,7 +840,7 @@ static int probes_seq_show(struct seq_file *m, void *v) for (i = 0; i < tk->tp.nr_args; i++) seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm); - seq_printf(m, "\n"); + seq_putc(m, '\n'); return 0; } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 33ff6a24b802..d22117beedd0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -606,7 +606,7 @@ static int probes_seq_show(struct seq_file *m, void *v) for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); - seq_printf(m, "\n"); + seq_putc(m, '\n'); return 0; } From d79ac28fde9c245473ba5f337341eb5823c7ebfc Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 8 Nov 2014 21:42:11 +0100 Subject: [PATCH 14/55] tracing: Merge consecutive seq_puts calls Consecutive seq_puts calls with literal strings can be merged to a single call. This reduces the size of the generated code, and can also lead to slight .rodata reduction (because of fewer nul and padding bytes). It should also shave a off a few clock cycles. Link: http://lkml.kernel.org/r/1415479332-25944-3-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 64 ++++++++++++++++++------------------- kernel/trace/trace_branch.c | 14 ++++---- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a419db75152c..44d561426700 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2509,14 +2509,14 @@ get_total_entries(struct trace_buffer *buf, static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n"); - seq_puts(m, "# / _-----=> irqs-off \n"); - seq_puts(m, "# | / _----=> need-resched \n"); - seq_puts(m, "# || / _---=> hardirq/softirq \n"); - seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| / delay \n"); - seq_puts(m, "# cmd pid ||||| time | caller \n"); - seq_puts(m, "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _------=> CPU# \n" + "# / _-----=> irqs-off \n" + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" + "# |||| / delay \n" + "# cmd pid ||||| time | caller \n" + "# \\ / ||||| \\ | / \n"); } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) @@ -2533,20 +2533,20 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); - seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); - seq_puts(m, "# | | | | |\n"); + seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n" + "# | | | | |\n"); } static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); - seq_puts(m, "# _-----=> irqs-off\n"); - seq_puts(m, "# / _----=> need-resched\n"); - seq_puts(m, "# | / _---=> hardirq/softirq\n"); - seq_puts(m, "# || / _--=> preempt-depth\n"); - seq_puts(m, "# ||| / delay\n"); - seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); - seq_puts(m, "# | | | |||| | |\n"); + seq_puts(m, "# _-----=> irqs-off\n" + "# / _----=> need-resched\n" + "# | / _---=> hardirq/softirq\n" + "# || / _--=> preempt-depth\n" + "# ||| / delay\n" + "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" + "# | | | |||| | |\n"); } void @@ -2860,34 +2860,34 @@ static void test_ftrace_alive(struct seq_file *m) { if (!ftrace_is_dead()) return; - seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); - seq_puts(m, "# MAY BE MISSING FUNCTION EVENTS\n"); + seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" + "# MAY BE MISSING FUNCTION EVENTS\n"); } #ifdef CONFIG_TRACER_MAX_TRACE static void show_snapshot_main_help(struct seq_file *m) { - seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); - seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); - seq_puts(m, "# Takes a snapshot of the main buffer.\n"); - seq_puts(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"); - seq_puts(m, "# (Doesn't have to be '2' works with any number that\n"); - seq_puts(m, "# is not a '0' or '1')\n"); + seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" + "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" + "# Takes a snapshot of the main buffer.\n" + "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" + "# (Doesn't have to be '2' works with any number that\n" + "# is not a '0' or '1')\n"); } static void show_snapshot_percpu_help(struct seq_file *m) { seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP - seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); - seq_puts(m, "# Takes a snapshot of the main buffer for this cpu.\n"); + seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" + "# Takes a snapshot of the main buffer for this cpu.\n"); #else - seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"); - seq_puts(m, "# Must use main snapshot file to allocate.\n"); + seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" + "# Must use main snapshot file to allocate.\n"); #endif - seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"); - seq_puts(m, "# (Doesn't have to be '2' works with any number that\n"); - seq_puts(m, "# is not a '0' or '1')\n"); + seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" + "# (Doesn't have to be '2' works with any number that\n" + "# is not a '0' or '1')\n"); } static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 126c622e4f42..a3916f68a1c7 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -164,9 +164,9 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, static void branch_print_header(struct seq_file *s) { seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT" - " FUNC:FILE:LINE\n"); - seq_puts(s, "# | | | | | " - " |\n"); + " FUNC:FILE:LINE\n" + "# | | | | | " + " |\n"); } static struct trace_event_functions trace_branch_funcs = { @@ -233,8 +233,8 @@ extern unsigned long __stop_annotated_branch_profile[]; static int annotated_branch_stat_headers(struct seq_file *m) { - seq_puts(m, " correct incorrect % "); - seq_puts(m, " Function " + seq_puts(m, " correct incorrect % " + " Function " " File Line\n" " ------- --------- - " " -------- " @@ -362,8 +362,8 @@ extern unsigned long __stop_branch_profile[]; static int all_branch_stat_headers(struct seq_file *m) { - seq_puts(m, " miss hit % "); - seq_puts(m, " Function " + seq_puts(m, " miss hit % " + " Function " " File Line\n" " ------- --------- - " " -------- " From 1177e4364154a00baf2c9eb72fd960f0c5a8de84 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 8 Nov 2014 21:42:12 +0100 Subject: [PATCH 15/55] trace: Replace single-character seq_puts with seq_putc Printing a single character to a seqfile might as well be done with seq_putc instead of seq_puts; this avoids a strlen() call and a memory access. It also shaves another few bytes off the generated code. Link: http://lkml.kernel.org/r/1415479332-25944-4-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_trigger.c | 4 ++-- kernel/trace/trace_functions_graph.c | 4 ++-- kernel/trace/trace_kprobe.c | 8 ++++---- kernel/trace/trace_printk.c | 2 +- kernel/trace/trace_uprobe.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 3d89ffc66740..8712df9decb4 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -383,7 +383,7 @@ event_trigger_print(const char *name, struct seq_file *m, if (filter_str) seq_printf(m, " if %s\n", filter_str); else - seq_puts(m, "\n"); + seq_putc(m, '\n'); return 0; } @@ -1105,7 +1105,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, if (data->filter_str) seq_printf(m, " if %s\n", data->filter_str); else - seq_puts(m, "\n"); + seq_putc(m, '\n'); return 0; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 29ba611e6f57..a82d1d3bd76d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1407,7 +1407,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) print_lat_header(s, flags); /* 1st line */ - seq_puts(s, "#"); + seq_putc(s, '#'); if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_puts(s, " TIME "); if (flags & TRACE_GRAPH_PRINT_CPU) @@ -1421,7 +1421,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) seq_puts(s, " FUNCTION CALLS\n"); /* 2nd line */ - seq_puts(s, "#"); + seq_putc(s, '#'); if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_puts(s, " | "); if (flags & TRACE_GRAPH_PRINT_CPU) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 240d27ddf82e..ef7e7f16ed1a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1030,7 +1030,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; - if (!trace_seq_puts(s, ")")) + if (!trace_seq_putc(s, ')')) goto partial; data = (u8 *)&field[1]; @@ -1039,7 +1039,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, data + tp->args[i].offset, field)) goto partial; - if (!trace_seq_puts(s, "\n")) + if (!trace_seq_putc(s, '\n')) goto partial; return TRACE_TYPE_HANDLED; @@ -1072,7 +1072,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) goto partial; - if (!trace_seq_puts(s, ")")) + if (!trace_seq_putc(s, ')')) goto partial; data = (u8 *)&field[1]; @@ -1081,7 +1081,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, data + tp->args[i].offset, field)) goto partial; - if (!trace_seq_puts(s, "\n")) + if (!trace_seq_putc(s, '\n')) goto partial; return TRACE_TYPE_HANDLED; diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 2900817ba65c..c4e70b6bd7fa 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -305,7 +305,7 @@ static int t_show(struct seq_file *m, void *v) seq_puts(m, "\\t"); break; case '\\': - seq_puts(m, "\\"); + seq_putc(m, '\\'); break; case '"': seq_puts(m, "\\\""); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d22117beedd0..e35327c787f7 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -872,7 +872,7 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e goto partial; } - if (trace_seq_puts(s, "\n")) + if (trace_seq_putc(s, '\n')) return TRACE_TYPE_HANDLED; partial: From 678f845ed0f40c29a74cd8dd60c48f2a6db44dc9 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 6 Nov 2014 15:25:09 -0200 Subject: [PATCH 16/55] ftrace-graph: show latency-format on print_graph_irq() On the function_graph tracer, the print_graph_irq() function prints a trace line with the flag ==========> on an irq handler entry, and the flag <========== on an irq handler return. But when the latency-format is enable, it is not printing the latency-format flags, causing the following error in the trace output: 0) ==========> | 0) d... | smp_apic_timer_interrupt() { This patch fixes this issue by printing the latency-format flags when it is enable. Link: http://lkml.kernel.org/r/7c2e226dac20c940b6242178fab7f0e3c9b5ce58.1415233316.git.bristot@redhat.com Reviewed-by: Luis Claudio R. Goncalves Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a82d1d3bd76d..93ebba09622e 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -699,6 +699,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, { int ret; struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; if (addr < (unsigned long)__irqentry_text_start || addr >= (unsigned long)__irqentry_text_end) @@ -728,6 +729,14 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, if (!ret) return TRACE_TYPE_PARTIAL_LINE; } + + /* Latency format */ + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + ret = print_graph_lat_fmt(s, ent); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + } /* No overhead */ From 4526d0676a150dce7a93ad93e03bef7f77e7c906 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 5 Nov 2014 16:18:44 +0900 Subject: [PATCH 17/55] function_graph: Fix micro seconds notations Usually, "msecs" notation means milli-seconds, and "usecs" notation means micro-seconds. Since the unit used in the code is micro-seconds, the notation should be replaced from msecs to usecs. Link: http://lkml.kernel.org/r/1415171926-9782-2-git-send-email-byungchul.park@lge.com Signed-off-by: Byungchul Park Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 93ebba09622e..55bdf9d7f714 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -768,19 +768,19 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ - char msecs_str[21]; + char usecs_str[21]; char nsecs_str[5]; int ret, len; int i; - sprintf(msecs_str, "%lu", (unsigned long) duration); + sprintf(usecs_str, "%lu", (unsigned long) duration); /* Print msecs */ - ret = trace_seq_printf(s, "%s", msecs_str); + ret = trace_seq_printf(s, "%s", usecs_str); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - len = strlen(msecs_str); + len = strlen(usecs_str); /* Print nsecs (we don't want to exceed 7 numbers) */ if (len < 7) { @@ -831,10 +831,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, /* Signal a overhead of time execution to the output */ if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { - /* Duration exceeded 100 msecs */ + /* Duration exceeded 100 usecs */ if (duration > 100000ULL) ret = trace_seq_puts(s, "! "); - /* Duration exceeded 10 msecs */ + /* Duration exceeded 10 usecs */ else if (duration > 10000ULL) ret = trace_seq_puts(s, "+ "); } From a9ce7c36aa4256019180c590d60e2fad7431c749 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 17 Nov 2014 23:08:24 -0500 Subject: [PATCH 18/55] tracing: Fix race of function probes counting The function probe counting for traceon and traceoff suffered a race condition where if the probe was executing on two or more CPUs at the same time, it could decrement the counter by more than one when disabling (or enabling) the tracer only once. The way the traceon and traceoff probes are suppose to work is that they disable (or enable) tracing once per count. If a user were to echo 'schedule:traceoff:3' into set_ftrace_filter, then when the schedule function was called, it would disable tracing. But the count should only be decremented once (to 2). Then if the user enabled tracing again (via tracing_on file), the next call to schedule would disable tracing again and the count would be decremented to 1. But if multiple CPUS called schedule at the same time, it is possible that the count would be decremented more than once because of the simple "count--" used. By reading the count into a local variable and using memory barriers we can guarantee that the count would only be decremented once per disable (or enable). The stack trace probe had a similar race, but here the stack trace will decrement for each time it is called. But this had the read-modify- write race, where it could stack trace more than the number of times that was specified. This case we use a cmpxchg to stack trace only the number of times specified. The dump probes can still use the old "update_count()" function as they only run once, and that is controlled by the dump logic itself. Link: http://lkml.kernel.org/r/20141118134643.4b550ee4@gandalf.local.home Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions.c | 117 +++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index a8e0c7666164..973db52eb070 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -261,37 +261,74 @@ static struct tracer function_trace __tracer_data = }; #ifdef CONFIG_DYNAMIC_FTRACE -static int update_count(void **data) +static void update_traceon_count(void **data, int on) { - unsigned long *count = (long *)data; + long *count = (long *)data; + long old_count = *count; - if (!*count) - return 0; + /* + * Tracing gets disabled (or enabled) once per count. + * This function can be called at the same time on mulitple CPUs. + * It is fine if both disable (or enable) tracing, as disabling + * (or enabling) the second time doesn't do anything as the + * state of the tracer is already disabled (or enabled). + * What needs to be synchronized in this case is that the count + * only gets decremented once, even if the tracer is disabled + * (or enabled) twice, as the second one is really a nop. + * + * The memory barriers guarantee that we only decrement the + * counter once. First the count is read to a local variable + * and a read barrier is used to make sure that it is loaded + * before checking if the tracer is in the state we want. + * If the tracer is not in the state we want, then the count + * is guaranteed to be the old count. + * + * Next the tracer is set to the state we want (disabled or enabled) + * then a write memory barrier is used to make sure that + * the new state is visible before changing the counter by + * one minus the old counter. This guarantees that another CPU + * executing this code will see the new state before seeing + * the new counter value, and would not do anthing if the new + * counter is seen. + * + * Note, there is no synchronization between this and a user + * setting the tracing_on file. But we currently don't care + * about that. + */ + if (!old_count) + return; - if (*count != -1) - (*count)--; + /* Make sure we see count before checking tracing state */ + smp_rmb(); - return 1; + if (on == !!tracing_is_on()) + return; + + if (on) + tracing_on(); + else + tracing_off(); + + /* unlimited? */ + if (old_count == -1) + return; + + /* Make sure tracing state is visible before updating count */ + smp_wmb(); + + *count = old_count - 1; } static void ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data) { - if (tracing_is_on()) - return; - - if (update_count(data)) - tracing_on(); + update_traceon_count(data, 1); } static void ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data) { - if (!tracing_is_on()) - return; - - if (update_count(data)) - tracing_off(); + update_traceon_count(data, 0); } static void @@ -330,11 +367,49 @@ ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data) static void ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) { - if (!tracing_is_on()) - return; + long *count = (long *)data; + long old_count; + long new_count; - if (update_count(data)) - trace_dump_stack(STACK_SKIP); + /* + * Stack traces should only execute the number of times the + * user specified in the counter. + */ + do { + + if (!tracing_is_on()) + return; + + old_count = *count; + + if (!old_count) + return; + + /* unlimited? */ + if (old_count == -1) { + trace_dump_stack(STACK_SKIP); + return; + } + + new_count = old_count - 1; + new_count = cmpxchg(count, old_count, new_count); + if (new_count == old_count) + trace_dump_stack(STACK_SKIP); + + } while (new_count != old_count); +} + +static int update_count(void **data) +{ + unsigned long *count = (long *)data; + + if (!*count) + return 0; + + if (*count != -1) + (*count)--; + + return 1; } static void From 9960efeb80f73bd073483dab0855ee0ddc27085c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 18 Nov 2014 19:13:25 -0500 Subject: [PATCH 19/55] ftrace/x86: Add frames pointers to trampoline as necessary When CONFIG_FRAME_POINTERS are enabled, it is required that the ftrace_caller and ftrace_regs_caller trampolines set up frame pointers otherwise a stack trace from a function call wont print the functions that called the trampoline. This is due to a check in __save_stack_address(): #ifdef CONFIG_FRAME_POINTER if (!reliable) return; #endif The "reliable" variable is only set if the function address is equal to contents of the address before the address the frame pointer register points to. If the frame pointer is not set up for the ftrace caller then this will fail the reliable test. It will miss the function that called the trampoline. Worse yet, if fentry is used (gcc 4.6 and beyond), it will also miss the parent, as the fentry is called before the stack frame is set up. That means the bp frame pointer points to the stack of just before the parent function was called. Link: http://lkml.kernel.org/r/20141119034829.355440340@goodmis.org Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: stable@vger.kernel.org # 3.7+ Acked-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 42f0cdd20baf..35a793fa4bba 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -47,14 +47,51 @@ GLOBAL(\trace_label) #endif .endm +#ifdef CONFIG_FRAME_POINTER +/* + * Stack traces will stop at the ftrace trampoline if the frame pointer + * is not set up properly. If fentry is used, we need to save a frame + * pointer for the parent as well as the function traced, because the + * fentry is called before the stack frame is set up, where as mcount + * is called afterward. + */ +.macro create_frame parent rip +#ifdef CC_USING_FENTRY + pushq \parent + pushq %rbp + movq %rsp, %rbp +#endif + pushq \rip + pushq %rbp + movq %rsp, %rbp +.endm + +.macro restore_frame +#ifdef CC_USING_FENTRY + addq $16, %rsp +#endif + popq %rbp + addq $8, %rsp +.endm +#else +.macro create_frame parent rip +.endm +.macro restore_frame +.endm +#endif /* CONFIG_FRAME_POINTER */ + ENTRY(ftrace_caller) ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx + create_frame %rsi, %rdi + GLOBAL(ftrace_call) call ftrace_stub + restore_frame + MCOUNT_RESTORE_FRAME /* @@ -105,9 +142,13 @@ ENTRY(ftrace_regs_caller) /* regs go into 4th parameter */ leaq (%rsp), %rcx + create_frame %rsi, %rdi + GLOBAL(ftrace_regs_call) call ftrace_stub + restore_frame + /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax movq %rax, SS(%rsp) From aec0be2d6e9f02dbef41ee54854c2e003e55c23e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 18 Nov 2014 21:14:11 -0500 Subject: [PATCH 20/55] ftrace/x86/extable: Add is_ftrace_trampoline() function Stack traces that happen from function tracing check if the address on the stack is a __kernel_text_address(). That is, is the address kernel code. This calls core_kernel_text() which returns true if the address is part of the builtin kernel code. It also calls is_module_text_address() which returns true if the address belongs to module code. But what is missing is ftrace dynamically allocated trampolines. These trampolines are allocated for individual ftrace_ops that call the ftrace_ops callback functions directly. But if they do a stack trace, the code checking the stack wont detect them as they are neither core kernel code nor module address space. Adding another field to ftrace_ops that also stores the size of the trampoline assigned to it we can create a new function called is_ftrace_trampoline() that returns true if the address is a dynamically allocate ftrace trampoline. Note, it ignores trampolines that are not dynamically allocated as they will return true with the core_kernel_text() function. Link: http://lkml.kernel.org/r/20141119034829.497125839@goodmis.org Cc: Ingo Molnar Cc: "H. Peter Anvin" Acked-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 9 +++++++-- include/linux/ftrace.h | 8 ++++++++ kernel/extable.c | 7 ++++++- kernel/trace/ftrace.c | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1aea94d336c7..60881d919432 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -712,7 +712,8 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -static unsigned long create_trampoline(struct ftrace_ops *ops) +static unsigned long +create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) { unsigned const char *jmp; unsigned long start_offset; @@ -749,6 +750,8 @@ static unsigned long create_trampoline(struct ftrace_ops *ops) if (!trampoline) return 0; + *tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *); + /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) { @@ -819,6 +822,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) unsigned char *new; unsigned long offset; unsigned long ip; + unsigned int size; int ret; if (ops->trampoline) { @@ -829,9 +833,10 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; } else { - ops->trampoline = create_trampoline(ops); + ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; + ops->trampoline_size = size; } offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 619e37cc17fd..7b2616fa2472 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -150,6 +150,7 @@ struct ftrace_ops { struct ftrace_ops_hash *func_hash; struct ftrace_ops_hash old_hash; unsigned long trampoline; + unsigned long trampoline_size; #endif }; @@ -297,6 +298,8 @@ extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); +bool is_ftrace_trampoline(unsigned long addr); + /* * The dyn_ftrace record's flags field is split into two parts. * the first part which is '0-FTRACE_REF_MAX' is a counter of @@ -596,6 +599,11 @@ static inline ssize_t ftrace_notrace_write(struct file *file, const char __user size_t cnt, loff_t *ppos) { return -ENODEV; } static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } + +static inline bool is_ftrace_trampoline(unsigned long addr) +{ + return false; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/extable.c b/kernel/extable.c index d8a6446adbcb..c98f926277a8 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; + if (is_ftrace_trampoline(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -119,7 +122,9 @@ int kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) return 1; - return is_module_text_address(addr); + if (is_module_text_address(addr)) + return 1; + return is_ftrace_trampoline(addr); } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6233f9102179..fa0f36bb32e9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1117,6 +1117,43 @@ static struct ftrace_ops global_ops = { FTRACE_OPS_FL_INITIALIZED, }; +/* + * This is used by __kernel_text_address() to return true if the + * the address is on a dynamically allocated trampoline that would + * not return true for either core_kernel_text() or + * is_module_text_address(). + */ +bool is_ftrace_trampoline(unsigned long addr) +{ + struct ftrace_ops *op; + bool ret = false; + + /* + * Some of the ops may be dynamically allocated, + * they are freed after a synchronize_sched(). + */ + preempt_disable_notrace(); + + do_for_each_ftrace_op(op, ftrace_ops_list) { + /* + * This is to check for dynamically allocated trampolines. + * Trampolines that are in kernel text will have + * core_kernel_text() return true. + */ + if (op->trampoline && op->trampoline_size) + if (addr >= op->trampoline && + addr < op->trampoline + op->trampoline_size) { + ret = true; + goto out; + } + } while_for_each_ftrace_op(op); + + out: + preempt_enable_notrace(); + + return ret; +} + struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; @@ -5373,6 +5410,7 @@ static struct ftrace_ops graph_ops = { FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, + /* trampoline_size is only needed for dynamically allocated tramps */ #endif ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; From 467aa1f276a0cf7445a1f4b9600f03d949b76251 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 24 Sep 2014 18:52:39 -0400 Subject: [PATCH 21/55] x86/kvm/tracing: Use helper function trace_seq_buffer_ptr() To allow for the restructiong of the trace_seq code, we need users of it to use the helper functions instead of accessing the internals of the trace_seq structure itself. Link: http://lkml.kernel.org/r/20141104160221.585025609@goodmis.org Tested-by: Jiri Kosina Acked-by: Jiri Kosina Acked-by: Paolo Bonzini Acked-by: Mark Rustad Reviewed-by: Petr Mladek Cc: Jeff Kirsher Signed-off-by: Steven Rostedt --- arch/x86/kvm/mmutrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 5aaf35641768..ce463a9cc8fb 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,7 +22,7 @@ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const u32 saved_len = p->len; \ + const char *saved_ptr = trace_seq_buffer_ptr(p); \ static const char *access_str[] = { \ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ }; \ @@ -41,7 +41,7 @@ role.nxe ? "" : "!", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ - p->buffer + saved_len; \ + saved_ptr; \ }) #define kvm_mmu_trace_pferr_flags \ From dbcf3e06cae0424d85c8dd1a1b93e25e84b06a8d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 31 Oct 2014 19:43:08 -0400 Subject: [PATCH 22/55] RAS/tracing: Use trace_seq_buffer_ptr() helper instead of open coded Use the helper function trace_seq_buffer_ptr() to get the current location of the next buffer write of a trace_seq object, instead of open coding it. This facilitates the conversion of trace_seq to use seq_buf. Tested-by: Jiri Kosina Acked-by: Jiri Kosina Acked-by: Borislav Petkov Reviewed-by: Petr Mladek Cc: Chen Gong Cc: Tony Luck Signed-off-by: Steven Rostedt --- drivers/firmware/efi/cper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 5b53d6183b6b..4fd9961d552e 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -294,7 +294,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *mem, const char *cper_mem_err_unpack(struct trace_seq *p, struct cper_mem_err_compact *cmem) { - const char *ret = p->buffer + p->len; + const char *ret = trace_seq_buffer_ptr(p); if (cper_mem_err_location(cmem, rcd_decode_str)) trace_seq_printf(p, "%s", rcd_decode_str); From e400a40cffa77e45c60c5431ec8757579247cae2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 21:11:59 -0500 Subject: [PATCH 23/55] tracing: Fix trace_seq_bitmask() to start at current position In trace_seq_bitmask() it calls bitmap_scnprintf() not from the current position of the trace_seq buffer (s->buffer + s->len), but instead from the beginning of the buffer (s->buffer). Luckily, the only user of this "ipi_raise tracepoint" uses it as the first parameter, and as such, the start of the temp buffer in include/trace/ftrace.h (see __get_bitmask()). Reported-by: Petr Mladek Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/trace_seq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 1f24ed99dca2..b100994a17fe 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -122,7 +122,7 @@ int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, if (s->full || !len) return 0; - ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); + ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits); s->len += ret; return 1; From 19a7fe206232cc875a3083211e0a21c08edd756e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 10:29:54 -0500 Subject: [PATCH 24/55] tracing: Add trace_seq_has_overflowed() and trace_handle_return() Adding a trace_seq_has_overflowed() which returns true if the trace_seq had too much written into it allows us to simplify the code. Instead of checking the return value of every call to trace_seq_printf() and friends, they can all be called normally, and at the end we can return !trace_seq_has_overflowed() instead. Several functions also return TRACE_TYPE_PARTIAL_LINE when the trace_seq overflowed and TRACE_TYPE_HANDLED otherwise. Another helper function was created called trace_handle_return() which takes a trace_seq and returns these enums. Using this helper function also simplifies the code. This change also makes it possible to remove the return values of trace_seq_printf() and friends. They should instead just be void functions. Link: http://lkml.kernel.org/r/20141114011410.365183157@goodmis.org Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 11 + include/linux/trace_seq.h | 12 + include/trace/ftrace.h | 6 +- kernel/trace/trace.c | 69 +++--- kernel/trace/trace.h | 1 + kernel/trace/trace_output.c | 412 ++++++++++++++--------------------- kernel/trace/trace_output.h | 14 +- 7 files changed, 228 insertions(+), 297 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 28672e87e910..0bebb5c348b8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -138,6 +138,17 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; +/* + * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq + * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function + * simplifies those functions and keeps them in sync. + */ +static inline enum print_line_t trace_handle_return(struct trace_seq *s) +{ + return trace_seq_has_overflowed(s) ? + TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; +} + void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index ea6c9dea79e3..07eda413dfcf 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -40,6 +40,18 @@ trace_seq_buffer_ptr(struct trace_seq *s) return s->buffer + s->len; } +/** + * trace_seq_has_overflowed - return true if the trace_seq took too much + * @s: trace sequence descriptor + * + * Returns true if too much data was added to the trace_seq and it is + * now full and will not take anymore. + */ +static inline bool trace_seq_has_overflowed(struct trace_seq *s) +{ + return s->full || s->len > PAGE_SIZE - 1; +} + /* * Currently only defined when tracing is enabled. */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 26b4f2e13275..f13471b5d27a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -280,11 +280,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ if (ret) \ return ret; \ \ - ret = trace_seq_printf(s, print); \ - if (!ret) \ - return TRACE_TYPE_PARTIAL_LINE; \ + trace_seq_printf(s, print); \ \ - return TRACE_TYPE_HANDLED; \ + return trace_handle_return(s); \ } \ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ .trace = ftrace_raw_output_##call, \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44d561426700..3ce3c4ccfc94 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2649,24 +2649,21 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (iter->iter_flags & TRACE_FILE_LAT_FMT) { - if (!trace_print_lat_context(iter)) - goto partial; - } else { - if (!trace_print_context(iter)) - goto partial; - } + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + trace_print_lat_context(iter); + else + trace_print_context(iter); } + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; + if (event) return event->funcs->trace(iter, sym_flags, event); - if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) - goto partial; + trace_seq_printf(s, "Unknown type %d\n", entry->type); - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static enum print_line_t print_raw_fmt(struct trace_iterator *iter) @@ -2677,22 +2674,20 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) entry = iter->ent; - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (!trace_seq_printf(s, "%d %d %llu ", - entry->pid, iter->cpu, iter->ts)) - goto partial; - } + if (trace_flags & TRACE_ITER_CONTEXT_INFO) + trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, iter->ts); + + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; event = ftrace_find_event(entry->type); if (event) return event->funcs->raw(iter, 0, event); - if (!trace_seq_printf(s, "%d ?\n", entry->type)) - goto partial; + trace_seq_printf(s, "%d ?\n", entry->type); - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static enum print_line_t print_hex_fmt(struct trace_iterator *iter) @@ -2705,9 +2700,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - SEQ_PUT_HEX_FIELD_RET(s, entry->pid); - SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); - SEQ_PUT_HEX_FIELD_RET(s, iter->ts); + SEQ_PUT_HEX_FIELD(s, entry->pid); + SEQ_PUT_HEX_FIELD(s, iter->cpu); + SEQ_PUT_HEX_FIELD(s, iter->ts); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; } event = ftrace_find_event(entry->type); @@ -2717,9 +2714,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return ret; } - SEQ_PUT_FIELD_RET(s, newline); + SEQ_PUT_FIELD(s, newline); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t print_bin_fmt(struct trace_iterator *iter) @@ -2731,9 +2728,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) entry = iter->ent; if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, iter->cpu); - SEQ_PUT_FIELD_RET(s, iter->ts); + SEQ_PUT_FIELD(s, entry->pid); + SEQ_PUT_FIELD(s, iter->cpu); + SEQ_PUT_FIELD(s, iter->ts); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; } event = ftrace_find_event(entry->type); @@ -2779,10 +2778,12 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; - if (iter->lost_events && - !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", - iter->cpu, iter->lost_events)) - return TRACE_TYPE_PARTIAL_LINE; + if (iter->lost_events) { + trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events); + if (trace_seq_has_overflowed(&iter->seq)) + return TRACE_TYPE_PARTIAL_LINE; + } if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3376de623ea0..19418221b302 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_FTRACE_SYSCALLS #include /* For NR_SYSCALLS */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cfa91de22e27..163c11b6b8ff 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -25,15 +25,12 @@ enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; struct bputs_entry *field; - int ret; trace_assign_type(field, entry); - ret = trace_seq_puts(s, field->str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, field->str); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) @@ -41,15 +38,12 @@ enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; struct bprint_entry *field; - int ret; trace_assign_type(field, entry); - ret = trace_seq_bprintf(s, field->fmt, field->buf); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_bprintf(s, field->fmt, field->buf); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) @@ -57,15 +51,12 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; struct print_entry *field; - int ret; trace_assign_type(field, entry); - ret = trace_seq_puts(s, field->buf); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, field->buf); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } const char * @@ -193,7 +184,6 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, struct trace_seq *s = &iter->seq; struct trace_seq *p = &iter->tmp_seq; struct trace_entry *entry; - int ret; event = container_of(trace_event, struct ftrace_event_call, event); entry = iter->ent; @@ -204,8 +194,9 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } trace_seq_init(p); - ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event)); - if (!ret) + trace_seq_printf(s, "%s: ", ftrace_event_name(event)); + + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; return 0; @@ -216,18 +207,11 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; - int ret; - ret = trace_seq_printf(s, "%s: ", name); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%s: ", name); + trace_seq_vprintf(s, fmt, ap); - ret = trace_seq_vprintf(s, fmt, ap); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) @@ -260,7 +244,7 @@ static inline const char *kretprobed(const char *name) } #endif /* CONFIG_KRETPROBES */ -static int +static void seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS @@ -271,12 +255,11 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) name = kretprobed(str); - return trace_seq_printf(s, fmt, name); + trace_seq_printf(s, fmt, name); #endif - return 1; } -static int +static void seq_print_sym_offset(struct trace_seq *s, const char *fmt, unsigned long address) { @@ -287,9 +270,8 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, sprint_symbol(str, address); name = kretprobed(str); - return trace_seq_printf(s, fmt, name); + trace_seq_printf(s, fmt, name); #endif - return 1; } #ifndef CONFIG_64BIT @@ -320,14 +302,14 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (file) { ret = trace_seq_path(s, &file->f_path); if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", - ip - vmstart); + trace_seq_printf(s, "[+0x%lx]", + ip - vmstart); } up_read(&mm->mmap_sem); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; + trace_seq_printf(s, " <" IP_FMT ">", ip); + return !trace_seq_has_overflowed(s); } int @@ -335,7 +317,6 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, unsigned long sym_flags) { struct mm_struct *mm = NULL; - int ret = 1; unsigned int i; if (trace_flags & TRACE_ITER_SYM_USEROBJ) { @@ -354,48 +335,45 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { unsigned long ip = entry->caller[i]; - if (ip == ULONG_MAX || !ret) + if (ip == ULONG_MAX || trace_seq_has_overflowed(s)) break; - if (ret) - ret = trace_seq_puts(s, " => "); + + trace_seq_puts(s, " => "); + if (!ip) { - if (ret) - ret = trace_seq_puts(s, "??"); - if (ret) - ret = trace_seq_putc(s, '\n'); + trace_seq_puts(s, "??"); + trace_seq_putc(s, '\n'); continue; } - if (!ret) - break; - if (ret) - ret = seq_print_user_ip(s, mm, ip, sym_flags); - ret = trace_seq_putc(s, '\n'); + + seq_print_user_ip(s, mm, ip, sym_flags); + trace_seq_putc(s, '\n'); } if (mm) mmput(mm); - return ret; + + return !trace_seq_has_overflowed(s); } int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { - int ret; - - if (!ip) - return trace_seq_putc(s, '0'); + if (!ip) { + trace_seq_putc(s, '0'); + goto out; + } if (sym_flags & TRACE_ITER_SYM_OFFSET) - ret = seq_print_sym_offset(s, "%s", ip); + seq_print_sym_offset(s, "%s", ip); else - ret = seq_print_sym_short(s, "%s", ip); - - if (!ret) - return 0; + seq_print_sym_short(s, "%s", ip); if (sym_flags & TRACE_ITER_SYM_ADDR) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; + trace_seq_printf(s, " <" IP_FMT ">", ip); + + out: + return !trace_seq_has_overflowed(s); } /** @@ -413,7 +391,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) char irqs_off; int hardirq; int softirq; - int ret; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; @@ -445,16 +422,15 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) softirq ? 's' : '.'; - if (!trace_seq_printf(s, "%c%c%c", - irqs_off, need_resched, hardsoft_irq)) - return 0; + trace_seq_printf(s, "%c%c%c", + irqs_off, need_resched, hardsoft_irq); if (entry->preempt_count) - ret = trace_seq_printf(s, "%x", entry->preempt_count); + trace_seq_printf(s, "%x", entry->preempt_count); else - ret = trace_seq_putc(s, '.'); + trace_seq_putc(s, '.'); - return ret; + return !trace_seq_has_overflowed(s); } static int @@ -464,9 +440,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_find_cmdline(entry->pid, comm); - if (!trace_seq_printf(s, "%8.8s-%-5d %3d", - comm, entry->pid, cpu)) - return 0; + trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, entry->pid, cpu); return trace_print_lat_fmt(s, entry); } @@ -493,24 +468,29 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); unsigned long rel_msec = (unsigned long)rel_ts; - return trace_seq_printf( - s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", - ns2usecs(iter->ts), - abs_msec, abs_usec, - rel_msec, rel_usec); + trace_seq_printf( + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", + ns2usecs(iter->ts), + abs_msec, abs_usec, + rel_msec, rel_usec); + } else if (verbose && !in_ns) { - return trace_seq_printf( - s, "[%016llx] %lld (+%lld): ", - iter->ts, abs_ts, rel_ts); + trace_seq_printf( + s, "[%016llx] %lld (+%lld): ", + iter->ts, abs_ts, rel_ts); + } else if (!verbose && in_ns) { - return trace_seq_printf( - s, " %4lldus%c: ", - abs_ts, - rel_ts > preempt_mark_thresh_us ? '!' : - rel_ts > 1 ? '+' : ' '); + trace_seq_printf( + s, " %4lldus%c: ", + abs_ts, + rel_ts > preempt_mark_thresh_us ? '!' : + rel_ts > 1 ? '+' : ' '); + } else { /* !verbose && !in_ns */ - return trace_seq_printf(s, " %4lld: ", abs_ts); + trace_seq_printf(s, " %4lld: ", abs_ts); } + + return !trace_seq_has_overflowed(s); } int trace_print_context(struct trace_iterator *iter) @@ -520,34 +500,29 @@ int trace_print_context(struct trace_iterator *iter) unsigned long long t; unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; - int ret; trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s-%-5d [%03d] ", + trace_seq_printf(s, "%16s-%-5d [%03d] ", comm, entry->pid, iter->cpu); - if (!ret) - return 0; - if (trace_flags & TRACE_ITER_IRQ_INFO) { - ret = trace_print_lat_fmt(s, entry); - if (!ret) - return 0; - } + if (trace_flags & TRACE_ITER_IRQ_INFO) + trace_print_lat_fmt(s, entry); if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { t = ns2usecs(iter->ts); usec_rem = do_div(t, USEC_PER_SEC); secs = (unsigned long)t; - return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); + trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); } else - return trace_seq_printf(s, " %12llu: ", iter->ts); + trace_seq_printf(s, " %12llu: ", iter->ts); + + return !trace_seq_has_overflowed(s); } int trace_print_lat_context(struct trace_iterator *iter) { u64 next_ts; - int ret; /* trace_find_next_entry will reset ent_size */ int ent_size = iter->ent_size; struct trace_seq *s = &iter->seq; @@ -567,18 +542,17 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf( - s, "%16s %5d %3d %d %08x %08lx ", - comm, entry->pid, iter->cpu, entry->flags, - entry->preempt_count, iter->idx); + trace_seq_printf( + s, "%16s %5d %3d %d %08x %08lx ", + comm, entry->pid, iter->cpu, entry->flags, + entry->preempt_count, iter->idx); } else { - ret = lat_print_generic(s, entry, iter->cpu); + lat_print_generic(s, entry, iter->cpu); } - if (ret) - ret = lat_print_timestamp(iter, next_ts); + lat_print_timestamp(iter, next_ts); - return ret; + return !trace_seq_has_overflowed(s); } static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; @@ -764,10 +738,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, struct trace_event *event) { - if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type)) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type); - return TRACE_TYPE_HANDLED; + return trace_handle_return(&iter->seq); } /* TRACE_FN */ @@ -779,24 +752,16 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; + seq_print_ip_sym(s, field->ip, flags); if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { - if (!trace_seq_puts(s, " <-")) - goto partial; - if (!seq_print_ip_sym(s, - field->parent_ip, - flags)) - goto partial; + trace_seq_puts(s, " <-"); + seq_print_ip_sym(s, field->parent_ip, flags); } - if (!trace_seq_putc(s, '\n')) - goto partial; - return TRACE_TYPE_HANDLED; + trace_seq_putc(s, '\n'); - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, @@ -806,12 +771,11 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - if (!trace_seq_printf(&iter->seq, "%lx %lx\n", - field->ip, - field->parent_ip)) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(&iter->seq, "%lx %lx\n", + field->ip, + field->parent_ip); - return TRACE_TYPE_HANDLED; + return trace_handle_return(&iter->seq); } static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, @@ -822,10 +786,10 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - SEQ_PUT_HEX_FIELD_RET(s, field->ip); - SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); + SEQ_PUT_HEX_FIELD(s, field->ip); + SEQ_PUT_HEX_FIELD(s, field->parent_ip); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, @@ -836,10 +800,10 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - SEQ_PUT_FIELD_RET(s, field->ip); - SEQ_PUT_FIELD_RET(s, field->parent_ip); + SEQ_PUT_FIELD(s, field->ip); + SEQ_PUT_FIELD(s, field->parent_ip); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static struct trace_event_functions trace_fn_funcs = { @@ -868,18 +832,17 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, T = task_state_char(field->next_state); S = task_state_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); - if (!trace_seq_printf(&iter->seq, - " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", - field->prev_pid, - field->prev_prio, - S, delim, - field->next_cpu, - field->next_pid, - field->next_prio, - T, comm)) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(&iter->seq, + " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", + field->prev_pid, + field->prev_prio, + S, delim, + field->next_cpu, + field->next_pid, + field->next_prio, + T, comm); - return TRACE_TYPE_HANDLED; + return trace_handle_return(&iter->seq); } static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags, @@ -904,17 +867,16 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) if (!S) S = task_state_char(field->prev_state); T = task_state_char(field->next_state); - if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", - field->prev_pid, - field->prev_prio, - S, - field->next_cpu, - field->next_pid, - field->next_prio, - T)) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", + field->prev_pid, + field->prev_prio, + S, + field->next_cpu, + field->next_pid, + field->next_prio, + T); - return TRACE_TYPE_HANDLED; + return trace_handle_return(&iter->seq); } static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags, @@ -942,15 +904,15 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) S = task_state_char(field->prev_state); T = task_state_char(field->next_state); - SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); - SEQ_PUT_HEX_FIELD_RET(s, S); - SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu); - SEQ_PUT_HEX_FIELD_RET(s, field->next_pid); - SEQ_PUT_HEX_FIELD_RET(s, field->next_prio); - SEQ_PUT_HEX_FIELD_RET(s, T); + SEQ_PUT_HEX_FIELD(s, field->prev_pid); + SEQ_PUT_HEX_FIELD(s, field->prev_prio); + SEQ_PUT_HEX_FIELD(s, S); + SEQ_PUT_HEX_FIELD(s, field->next_cpu); + SEQ_PUT_HEX_FIELD(s, field->next_pid); + SEQ_PUT_HEX_FIELD(s, field->next_prio); + SEQ_PUT_HEX_FIELD(s, T); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags, @@ -973,15 +935,15 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - SEQ_PUT_FIELD_RET(s, field->prev_pid); - SEQ_PUT_FIELD_RET(s, field->prev_prio); - SEQ_PUT_FIELD_RET(s, field->prev_state); - SEQ_PUT_FIELD_RET(s, field->next_cpu); - SEQ_PUT_FIELD_RET(s, field->next_pid); - SEQ_PUT_FIELD_RET(s, field->next_prio); - SEQ_PUT_FIELD_RET(s, field->next_state); + SEQ_PUT_FIELD(s, field->prev_pid); + SEQ_PUT_FIELD(s, field->prev_prio); + SEQ_PUT_FIELD(s, field->prev_state); + SEQ_PUT_FIELD(s, field->next_cpu); + SEQ_PUT_FIELD(s, field->next_pid); + SEQ_PUT_FIELD(s, field->next_prio); + SEQ_PUT_FIELD(s, field->next_state); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static struct trace_event_functions trace_ctx_funcs = { @@ -1021,23 +983,19 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); end = (unsigned long *)((long)iter->ent + iter->ent_size); - if (!trace_seq_puts(s, "\n")) - goto partial; + trace_seq_puts(s, "\n"); for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { - if (!trace_seq_puts(s, " => ")) - goto partial; - if (!seq_print_ip_sym(s, *p, flags)) - goto partial; - if (!trace_seq_putc(s, '\n')) - goto partial; + if (trace_seq_has_overflowed(s)) + break; + + trace_seq_puts(s, " => "); + seq_print_ip_sym(s, *p, flags); + trace_seq_putc(s, '\n'); } - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static struct trace_event_functions trace_stack_funcs = { @@ -1058,16 +1016,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - if (!trace_seq_puts(s, "\n")) - goto partial; + trace_seq_puts(s, "\n"); + seq_print_userip_objs(field, s, flags); - if (!seq_print_userip_objs(field, s, flags)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static struct trace_event_functions trace_user_stack_funcs = { @@ -1090,19 +1042,11 @@ trace_bputs_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry); - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; + seq_print_ip_sym(s, field->ip, flags); + trace_seq_puts(s, ": "); + trace_seq_puts(s, field->str); - if (!trace_seq_puts(s, ": ")) - goto partial; - - if (!trace_seq_puts(s, field->str)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } @@ -1115,16 +1059,10 @@ trace_bputs_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - if (!trace_seq_printf(s, ": %lx : ", field->ip)) - goto partial; + trace_seq_printf(s, ": %lx : ", field->ip); + trace_seq_puts(s, field->str); - if (!trace_seq_puts(s, field->str)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static struct trace_event_functions trace_bputs_funcs = { @@ -1148,19 +1086,11 @@ trace_bprint_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry); - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; + seq_print_ip_sym(s, field->ip, flags); + trace_seq_puts(s, ": "); + trace_seq_bprintf(s, field->fmt, field->buf); - if (!trace_seq_puts(s, ": ")) - goto partial; - - if (!trace_seq_bprintf(s, field->fmt, field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } @@ -1173,16 +1103,10 @@ trace_bprint_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - if (!trace_seq_printf(s, ": %lx : ", field->ip)) - goto partial; + trace_seq_printf(s, ": %lx : ", field->ip); + trace_seq_bprintf(s, field->fmt, field->buf); - if (!trace_seq_bprintf(s, field->fmt, field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static struct trace_event_functions trace_bprint_funcs = { @@ -1204,16 +1128,10 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; + seq_print_ip_sym(s, field->ip, flags); + trace_seq_printf(s, ": %s", field->buf); - if (!trace_seq_printf(s, ": %s", field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(s); } static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, @@ -1223,13 +1141,9 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) - goto partial; + trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; + return trace_handle_return(&iter->seq); } static struct trace_event_functions trace_print_funcs = { diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 80b25b585a70..8ef2c40efb3c 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -35,17 +35,11 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); extern int __unregister_ftrace_event(struct trace_event *event); extern struct rw_semaphore trace_event_sem; -#define SEQ_PUT_FIELD_RET(s, x) \ -do { \ - if (!trace_seq_putmem(s, &(x), sizeof(x))) \ - return TRACE_TYPE_PARTIAL_LINE; \ -} while (0) +#define SEQ_PUT_FIELD(s, x) \ + trace_seq_putmem(s, &(x), sizeof(x)) -#define SEQ_PUT_HEX_FIELD_RET(s, x) \ -do { \ - if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ - return TRACE_TYPE_PARTIAL_LINE; \ -} while (0) +#define SEQ_PUT_HEX_FIELD(s, x) \ + trace_seq_putmem_hex(s, &(x), sizeof(x)) #endif From f4a1d08ce65d7156504f2f0eac26f47dfc9120e6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 11:35:48 -0500 Subject: [PATCH 25/55] blktrace/tracing: Use trace_seq_has_overflowed() helper function Checking the return code of every trace_seq_printf() operation and having to return early if it overflowed makes the code messy. Using the new trace_seq_has_overflowed() and trace_handle_return() functions allows us to clean up the code. In the future, trace_seq_printf() and friends will be turning into void functions and not returning a value. The trace_seq_has_overflowed() is to be used instead. This cleanup allows that change to take place. Cc: Jens Axboe Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 148 ++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 82 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c1bd4ada2a04..11b9cb36092b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1142,9 +1142,9 @@ static void get_pdu_remap(const struct trace_entry *ent, r->sector_from = be64_to_cpu(sector_from); } -typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); +typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act); -static int blk_log_action_classic(struct trace_iterator *iter, const char *act) +static void blk_log_action_classic(struct trace_iterator *iter, const char *act) { char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; @@ -1154,33 +1154,33 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) fill_rwbs(rwbs, t); - return trace_seq_printf(&iter->seq, - "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", - MAJOR(t->device), MINOR(t->device), iter->cpu, - secs, nsec_rem, iter->ent->pid, act, rwbs); + trace_seq_printf(&iter->seq, + "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", + MAJOR(t->device), MINOR(t->device), iter->cpu, + secs, nsec_rem, iter->ent->pid, act, rwbs); } -static int blk_log_action(struct trace_iterator *iter, const char *act) +static void blk_log_action(struct trace_iterator *iter, const char *act) { char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); - return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", - MAJOR(t->device), MINOR(t->device), act, rwbs); + trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", + MAJOR(t->device), MINOR(t->device), act, rwbs); } -static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) { const unsigned char *pdu_buf; int pdu_len; - int i, end, ret; + int i, end; pdu_buf = pdu_start(ent); pdu_len = te_blk_io_trace(ent)->pdu_len; if (!pdu_len) - return 1; + return; /* find the last zero that needs to be printed */ for (end = pdu_len - 1; end >= 0; end--) @@ -1188,119 +1188,107 @@ static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) break; end++; - if (!trace_seq_putc(s, '(')) - return 0; + trace_seq_putc(s, '('); for (i = 0; i < pdu_len; i++) { - ret = trace_seq_printf(s, "%s%02x", - i == 0 ? "" : " ", pdu_buf[i]); - if (!ret) - return ret; + trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); /* * stop when the rest is just zeroes and indicate so * with a ".." appended */ - if (i == end && end != pdu_len - 1) - return trace_seq_puts(s, " ..) "); + if (i == end && end != pdu_len - 1) { + trace_seq_puts(s, " ..) "); + return; + } } - return trace_seq_puts(s, ") "); + trace_seq_puts(s, ") "); } -static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { - int ret; - - ret = trace_seq_printf(s, "%u ", t_bytes(ent)); - if (!ret) - return 0; - ret = blk_log_dump_pdu(s, ent); - if (!ret) - return 0; - return trace_seq_printf(s, "[%s]\n", cmd); + trace_seq_printf(s, "%u ", t_bytes(ent)); + blk_log_dump_pdu(s, ent); + trace_seq_printf(s, "[%s]\n", cmd); } else { if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%s]\n", + trace_seq_printf(s, "%llu + %u [%s]\n", t_sector(ent), t_sec(ent), cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + else + trace_seq_printf(s, "[%s]\n", cmd); } } -static int blk_log_with_error(struct trace_seq *s, +static void blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { - int ret; - - ret = blk_log_dump_pdu(s, ent); - if (ret) - return trace_seq_printf(s, "[%d]\n", t_error(ent)); - return 0; + blk_log_dump_pdu(s, ent); + trace_seq_printf(s, "[%d]\n", t_error(ent)); } else { if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%d]\n", - t_sector(ent), - t_sec(ent), t_error(ent)); - return trace_seq_printf(s, "%llu [%d]\n", - t_sector(ent), t_error(ent)); + trace_seq_printf(s, "%llu + %u [%d]\n", + t_sector(ent), + t_sec(ent), t_error(ent)); + else + trace_seq_printf(s, "%llu [%d]\n", + t_sector(ent), t_error(ent)); } } -static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) { struct blk_io_trace_remap r = { .device_from = 0, }; get_pdu_remap(ent, &r); - return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", - t_sector(ent), t_sec(ent), - MAJOR(r.device_from), MINOR(r.device_from), - (unsigned long long)r.sector_from); + trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", + t_sector(ent), t_sec(ent), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); } -static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + trace_seq_printf(s, "[%s]\n", cmd); } -static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); + trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); } -static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), - get_pdu_int(ent), cmd); + trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), + get_pdu_int(ent), cmd); } -static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) +static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) { - int ret; const struct blk_io_trace *t = te_blk_io_trace(ent); - ret = trace_seq_putmem(s, t + 1, t->pdu_len); - if (ret) - return trace_seq_putc(s, '\n'); - return ret; + trace_seq_putmem(s, t + 1, t->pdu_len); + trace_seq_putc(s, '\n'); } /* @@ -1339,7 +1327,7 @@ static void blk_tracer_reset(struct trace_array *tr) static const struct { const char *act[2]; - int (*print)(struct trace_seq *s, const struct trace_entry *ent); + void (*print)(struct trace_seq *s, const struct trace_entry *ent); } what2act[] = { [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, @@ -1364,7 +1352,6 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, struct trace_seq *s = &iter->seq; const struct blk_io_trace *t; u16 what; - int ret; bool long_act; blk_log_action_t *log_action; @@ -1374,21 +1361,18 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, log_action = classic ? &blk_log_action_classic : &blk_log_action; if (t->action == BLK_TN_MESSAGE) { - ret = log_action(iter, long_act ? "message" : "m"); - if (ret) - ret = blk_log_msg(s, iter->ent); - goto out; + log_action(iter, long_act ? "message" : "m"); + blk_log_msg(s, iter->ent); } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(s, "Unknown action %x\n", what); + trace_seq_printf(s, "Unknown action %x\n", what); else { - ret = log_action(iter, what2act[what].act[long_act]); - if (ret) - ret = what2act[what].print(s, iter->ent); + log_action(iter, what2act[what].act[long_act]); + what2act[what].print(s, iter->ent); } -out: - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + + return trace_handle_return(s); } static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, @@ -1397,7 +1381,7 @@ static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, return print_one_line(iter, false); } -static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) +static void blk_trace_synthesize_old_trace(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; @@ -1407,18 +1391,18 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) .time = iter->ts, }; - if (!trace_seq_putmem(s, &old, offset)) - return 0; - return trace_seq_putmem(s, &t->sector, - sizeof(old) - offset + t->pdu_len); + trace_seq_putmem(s, &old, offset); + trace_seq_putmem(s, &t->sector, + sizeof(old) - offset + t->pdu_len); } static enum print_line_t blk_trace_event_print_binary(struct trace_iterator *iter, int flags, struct trace_event *event) { - return blk_trace_synthesize_old_trace(iter) ? - TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + blk_trace_synthesize_old_trace(iter); + + return trace_handle_return(&iter->seq); } static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) From c0cd93aa1640a48038bacbee093695f892ea0130 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 11:49:00 -0500 Subject: [PATCH 26/55] ring-buffer: Remove check of trace_seq_{puts,printf}() return values Remove checking the return value of all trace_seq_puts(). It was wrong anyway as only the last return value mattered. But as the trace_seq_puts() is going to be a void function in the future, we should not be checking the return value of it anyway. Just return !trace_seq_has_overflowed() instead. Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 67 ++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2d75c94ae87d..a28bdd17c853 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -34,21 +34,19 @@ static void update_pages_handler(struct work_struct *work); */ int ring_buffer_print_entry_header(struct trace_seq *s) { - int ret; + trace_seq_puts(s, "# compressed entry header\n"); + trace_seq_puts(s, "\ttype_len : 5 bits\n"); + trace_seq_puts(s, "\ttime_delta : 27 bits\n"); + trace_seq_puts(s, "\tarray : 32 bits\n"); + trace_seq_putc(s, '\n'); + trace_seq_printf(s, "\tpadding : type == %d\n", + RINGBUF_TYPE_PADDING); + trace_seq_printf(s, "\ttime_extend : type == %d\n", + RINGBUF_TYPE_TIME_EXTEND); + trace_seq_printf(s, "\tdata max type_len == %d\n", + RINGBUF_TYPE_DATA_TYPE_LEN_MAX); - ret = trace_seq_puts(s, "# compressed entry header\n"); - ret = trace_seq_puts(s, "\ttype_len : 5 bits\n"); - ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n"); - ret = trace_seq_puts(s, "\tarray : 32 bits\n"); - ret = trace_seq_putc(s, '\n'); - ret = trace_seq_printf(s, "\tpadding : type == %d\n", - RINGBUF_TYPE_PADDING); - ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", - RINGBUF_TYPE_TIME_EXTEND); - ret = trace_seq_printf(s, "\tdata max type_len == %d\n", - RINGBUF_TYPE_DATA_TYPE_LEN_MAX); - - return ret; + return !trace_seq_has_overflowed(s); } /* @@ -419,32 +417,31 @@ static inline int test_time_stamp(u64 delta) int ring_buffer_print_page_header(struct trace_seq *s) { struct buffer_data_page field; - int ret; - ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\tsigned:%u;\n", - (unsigned int)sizeof(field.time_stamp), - (unsigned int)is_signed_type(u64)); + trace_seq_printf(s, "\tfield: u64 timestamp;\t" + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); - ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit), - (unsigned int)is_signed_type(long)); + trace_seq_printf(s, "\tfield: local_t commit;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); - ret = trace_seq_printf(s, "\tfield: int overwrite;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), - 1, - (unsigned int)is_signed_type(long)); + trace_seq_printf(s, "\tfield: int overwrite;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + 1, + (unsigned int)is_signed_type(long)); - ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE, - (unsigned int)is_signed_type(char)); + trace_seq_printf(s, "\tfield: char data;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), data), + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); - return ret; + return !trace_seq_has_overflowed(s); } struct rb_irq_work { From 7d40f67165d9a4c9add5c0f7e599637a292bbfb2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 13:19:06 -0500 Subject: [PATCH 27/55] tracing: Have branch tracer use trace_handle_return() helper function The branch tracer should not be checking the trace_seq_printf() return value as that will soon be void. There's a new trace_handle_return() helper function that will return TRACE_TYPE_PARTIAL_LINE if the trace_seq overflowed and TRACE_TYPE_HANDLED otherwise. Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index a3916f68a1c7..7d6e2afde669 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -151,14 +151,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n", - field->correct ? " ok " : " MISS ", - field->func, - field->file, - field->line)) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n", + field->correct ? " ok " : " MISS ", + field->func, + field->file, + field->line); - return TRACE_TYPE_HANDLED; + return trace_handle_return(&iter->seq); } static void branch_print_header(struct seq_file *s) From 9d9add34ec7b2cdd438b0b26481f8d1861bde45c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 14:57:38 -0500 Subject: [PATCH 28/55] tracing: Have function_graph use trace_seq_has_overflowed() Instead of doing individual checks all over the place that makes the code very messy. Just check trace_seq_has_overflowed() at the end or in strategic places. This makes the code much cleaner and also helps with getting closer to removing the return values of trace_seq_printf() and friends. Link: http://lkml.kernel.org/r/20141114011410.987913836@goodmis.org Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 2 +- kernel/trace/trace_functions_graph.c | 382 ++++++++------------------- 2 files changed, 118 insertions(+), 266 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 19418221b302..c3a37e55ec8b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,7 +726,7 @@ extern unsigned long trace_flags; extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); -extern enum print_line_t +extern void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 55bdf9d7f714..100288d10e1f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -107,7 +107,7 @@ enum { FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT, }; -static enum print_line_t +static void print_graph_duration(unsigned long long duration, struct trace_seq *s, u32 flags); @@ -483,33 +483,24 @@ static int graph_trace_update_thresh(struct trace_array *tr) static int max_bytes_for_cpu; -static enum print_line_t -print_graph_cpu(struct trace_seq *s, int cpu) +static void print_graph_cpu(struct trace_seq *s, int cpu) { - int ret; - /* * Start with a space character - to make it stand out * to the right a bit when trace output is pasted into * email: */ - ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; + trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); } #define TRACE_GRAPH_PROCINFO_LENGTH 14 -static enum print_line_t -print_graph_proc(struct trace_seq *s, pid_t pid) +static void print_graph_proc(struct trace_seq *s, pid_t pid) { char comm[TASK_COMM_LEN]; /* sign + log10(MAX_INT) + '\0' */ char pid_str[11]; int spaces = 0; - int ret; int len; int i; @@ -524,56 +515,43 @@ print_graph_proc(struct trace_seq *s, pid_t pid) spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; /* First spaces to align center */ - for (i = 0; i < spaces / 2; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + for (i = 0; i < spaces / 2; i++) + trace_seq_putc(s, ' '); - ret = trace_seq_printf(s, "%s-%s", comm, pid_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%s-%s", comm, pid_str); /* Last spaces to align center */ - for (i = 0; i < spaces - (spaces / 2); i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - return TRACE_TYPE_HANDLED; + for (i = 0; i < spaces - (spaces / 2); i++) + trace_seq_putc(s, ' '); } -static enum print_line_t -print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) +static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { - if (!trace_seq_putc(s, ' ')) - return 0; - - return trace_print_lat_fmt(s, entry); + trace_seq_putc(s, ' '); + trace_print_lat_fmt(s, entry); } /* If the pid changed since the last trace, output this event */ -static enum print_line_t +static void verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) { pid_t prev_pid; pid_t *last_pid; - int ret; if (!data) - return TRACE_TYPE_HANDLED; + return; last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); if (*last_pid == pid) - return TRACE_TYPE_HANDLED; + return; prev_pid = *last_pid; *last_pid = pid; if (prev_pid == -1) - return TRACE_TYPE_HANDLED; + return; /* * Context-switch trace line: @@ -582,33 +560,12 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) ------------------------------------------ */ - ret = trace_seq_puts(s, - " ------------------------------------------\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_proc(s, prev_pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_puts(s, " => "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_proc(s, pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_puts(s, - "\n ------------------------------------------\n\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; + trace_seq_puts(s, " ------------------------------------------\n"); + print_graph_cpu(s, cpu); + print_graph_proc(s, prev_pid); + trace_seq_puts(s, " => "); + print_graph_proc(s, pid); + trace_seq_puts(s, "\n ------------------------------------------\n\n"); } static struct ftrace_graph_ret_entry * @@ -682,103 +639,74 @@ get_return_for_leaf(struct trace_iterator *iter, return next; } -static int print_graph_abs_time(u64 t, struct trace_seq *s) +static void print_graph_abs_time(u64 t, struct trace_seq *s) { unsigned long usecs_rem; usecs_rem = do_div(t, NSEC_PER_SEC); usecs_rem /= 1000; - return trace_seq_printf(s, "%5lu.%06lu | ", - (unsigned long)t, usecs_rem); + trace_seq_printf(s, "%5lu.%06lu | ", + (unsigned long)t, usecs_rem); } -static enum print_line_t +static void print_graph_irq(struct trace_iterator *iter, unsigned long addr, enum trace_type type, int cpu, pid_t pid, u32 flags) { - int ret; struct trace_seq *s = &iter->seq; struct trace_entry *ent = iter->ent; if (addr < (unsigned long)__irqentry_text_start || addr >= (unsigned long)__irqentry_text_end) - return TRACE_TYPE_UNHANDLED; + return; if (trace_flags & TRACE_ITER_CONTEXT_INFO) { /* Absolute time */ - if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { - ret = print_graph_abs_time(iter->ts, s); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) + print_graph_abs_time(iter->ts, s); /* Cpu */ - if (flags & TRACE_GRAPH_PRINT_CPU) { - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } + if (flags & TRACE_GRAPH_PRINT_CPU) + print_graph_cpu(s, cpu); /* Proc */ if (flags & TRACE_GRAPH_PRINT_PROC) { - ret = print_graph_proc(s, pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_puts(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_proc(s, pid); + trace_seq_puts(s, " | "); } /* Latency format */ - if (trace_flags & TRACE_ITER_LATENCY_FMT) { - ret = print_graph_lat_fmt(s, ent); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } - + if (trace_flags & TRACE_ITER_LATENCY_FMT) + print_graph_lat_fmt(s, ent); } /* No overhead */ - ret = print_graph_duration(0, s, flags | FLAGS_FILL_START); - if (ret != TRACE_TYPE_HANDLED) - return ret; + print_graph_duration(0, s, flags | FLAGS_FILL_START); if (type == TRACE_GRAPH_ENT) - ret = trace_seq_puts(s, "==========>"); + trace_seq_puts(s, "==========>"); else - ret = trace_seq_puts(s, "<=========="); + trace_seq_puts(s, "<=========="); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = print_graph_duration(0, s, flags | FLAGS_FILL_END); - if (ret != TRACE_TYPE_HANDLED) - return ret; - - ret = trace_seq_putc(s, '\n'); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + print_graph_duration(0, s, flags | FLAGS_FILL_END); + trace_seq_putc(s, '\n'); } -enum print_line_t +void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ char usecs_str[21]; char nsecs_str[5]; - int ret, len; + int len; int i; sprintf(usecs_str, "%lu", (unsigned long) duration); /* Print msecs */ - ret = trace_seq_printf(s, "%s", usecs_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%s", usecs_str); len = strlen(usecs_str); @@ -787,79 +715,63 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); snprintf(nsecs_str, slen, "%03lu", nsecs_rem); - ret = trace_seq_printf(s, ".%s", nsecs_str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, ".%s", nsecs_str); len += strlen(nsecs_str); } - ret = trace_seq_puts(s, " us "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, " us "); /* Print remaining spaces to fit the row's width */ - for (i = len; i < 7; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - return TRACE_TYPE_HANDLED; + for (i = len; i < 7; i++) + trace_seq_putc(s, ' '); } -static enum print_line_t +static void print_graph_duration(unsigned long long duration, struct trace_seq *s, u32 flags) { - int ret = -1; + bool duration_printed = false; if (!(flags & TRACE_GRAPH_PRINT_DURATION) || !(trace_flags & TRACE_ITER_CONTEXT_INFO)) - return TRACE_TYPE_HANDLED; + return; /* No real adata, just filling the column with spaces */ switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) { case FLAGS_FILL_FULL: - ret = trace_seq_puts(s, " | "); - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, " | "); + return; case FLAGS_FILL_START: - ret = trace_seq_puts(s, " "); - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, " "); + return; case FLAGS_FILL_END: - ret = trace_seq_puts(s, " |"); - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, " |"); + return; } /* Signal a overhead of time execution to the output */ if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { /* Duration exceeded 100 usecs */ - if (duration > 100000ULL) - ret = trace_seq_puts(s, "! "); + if (duration > 100000ULL) { + trace_seq_puts(s, "! "); + duration_printed = true; + /* Duration exceeded 10 usecs */ - else if (duration > 10000ULL) - ret = trace_seq_puts(s, "+ "); + } else if (duration > 10000ULL) { + trace_seq_puts(s, "+ "); + duration_printed = true; + } } /* - * The -1 means we either did not exceed the duration tresholds - * or we dont want to print out the overhead. Either way we need - * to fill out the space. + * If we did not exceed the duration tresholds or we dont want + * to print out the overhead. Either way we need to fill out the space. */ - if (ret == -1) - ret = trace_seq_puts(s, " "); + if (!duration_printed) + trace_seq_puts(s, " "); - /* Catching here any failure happenned above */ - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_print_graph_duration(duration, s); - if (ret != TRACE_TYPE_HANDLED) - return ret; - - ret = trace_seq_puts(s, "| "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; + trace_print_graph_duration(duration, s); + trace_seq_puts(s, "| "); } /* Case of a leaf function on its call entry */ @@ -873,7 +785,6 @@ print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ret *graph_ret; struct ftrace_graph_ent *call; unsigned long long duration; - int ret; int i; graph_ret = &ret_entry->ret; @@ -899,22 +810,15 @@ print_graph_entry_leaf(struct trace_iterator *iter, } /* Overhead and duration */ - ret = print_graph_duration(duration, s, flags); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_duration(duration, s, flags); /* Function */ - for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) + trace_seq_putc(s, ' '); - ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%ps();\n", (void *)call->func); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t @@ -924,7 +828,6 @@ print_graph_entry_nested(struct trace_iterator *iter, { struct ftrace_graph_ent *call = &entry->graph_ent; struct fgraph_data *data = iter->private; - int ret; int i; if (data) { @@ -940,19 +843,15 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No time */ - ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); - if (ret != TRACE_TYPE_HANDLED) - return ret; + print_graph_duration(0, s, flags | FLAGS_FILL_FULL); /* Function */ - for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) + trace_seq_putc(s, ' '); - ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); - if (!ret) + trace_seq_printf(s, "%ps() {\n", (void *)call->func); + + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; /* @@ -962,62 +861,43 @@ print_graph_entry_nested(struct trace_iterator *iter, return TRACE_TYPE_NO_CONSUME; } -static enum print_line_t +static void print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, int type, unsigned long addr, u32 flags) { struct fgraph_data *data = iter->private; struct trace_entry *ent = iter->ent; int cpu = iter->cpu; - int ret; /* Pid */ - if (verif_pid(s, ent->pid, cpu, data) == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; + verif_pid(s, ent->pid, cpu, data); - if (type) { + if (type) /* Interrupt */ - ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } + print_graph_irq(iter, addr, type, cpu, ent->pid, flags); if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) - return 0; + return; /* Absolute time */ - if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { - ret = print_graph_abs_time(iter->ts, s); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) + print_graph_abs_time(iter->ts, s); /* Cpu */ - if (flags & TRACE_GRAPH_PRINT_CPU) { - ret = print_graph_cpu(s, cpu); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } + if (flags & TRACE_GRAPH_PRINT_CPU) + print_graph_cpu(s, cpu); /* Proc */ if (flags & TRACE_GRAPH_PRINT_PROC) { - ret = print_graph_proc(s, ent->pid); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_puts(s, " | "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_proc(s, ent->pid); + trace_seq_puts(s, " | "); } /* Latency format */ - if (trace_flags & TRACE_ITER_LATENCY_FMT) { - ret = print_graph_lat_fmt(s, ent); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; - } + if (trace_flags & TRACE_ITER_LATENCY_FMT) + print_graph_lat_fmt(s, ent); - return 0; + return; } /* @@ -1135,8 +1015,7 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, if (check_irq_entry(iter, flags, call->func, call->depth)) return TRACE_TYPE_HANDLED; - if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags); leaf_ret = get_return_for_leaf(iter, field); if (leaf_ret) @@ -1169,7 +1048,6 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, pid_t pid = ent->pid; int cpu = iter->cpu; int func_match = 1; - int ret; int i; if (check_irq_return(iter, flags, trace->depth)) @@ -1195,20 +1073,14 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } } - if (print_graph_prologue(iter, s, 0, 0, flags)) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_prologue(iter, s, 0, 0, flags); /* Overhead and duration */ - ret = print_graph_duration(duration, s, flags); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_duration(duration, s, flags); /* Closing brace */ - for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) + trace_seq_putc(s, ' '); /* * If the return function does not have a matching entry, @@ -1217,30 +1089,20 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, * belongs to, write out the function name. Always do * that if the funcgraph-tail option is enabled. */ - if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) { - ret = trace_seq_puts(s, "}\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } else { - ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) + trace_seq_puts(s, "}\n"); + else + trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func); /* Overrun */ - if (flags & TRACE_GRAPH_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)\n", - trace->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + if (flags & TRACE_GRAPH_PRINT_OVERRUN) + trace_seq_printf(s, " (Overruns: %lu)\n", + trace->overrun); - ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, - cpu, pid, flags); - if (ret == TRACE_TYPE_PARTIAL_LINE) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, + cpu, pid, flags); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t @@ -1257,26 +1119,18 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, if (data) depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; - if (print_graph_prologue(iter, s, 0, 0, flags)) - return TRACE_TYPE_PARTIAL_LINE; + print_graph_prologue(iter, s, 0, 0, flags); /* No time */ - ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); - if (ret != TRACE_TYPE_HANDLED) - return ret; + print_graph_duration(0, s, flags | FLAGS_FILL_FULL); /* Indentation */ if (depth > 0) - for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_putc(s, ' '); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) + trace_seq_putc(s, ' '); /* The comment */ - ret = trace_seq_puts(s, "/* "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, "/* "); switch (iter->ent->type) { case TRACE_BPRINT: @@ -1305,11 +1159,9 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, s->len--; } - ret = trace_seq_puts(s, " */\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_puts(s, " */\n"); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } From 85224da0b81a52a58b91feccdcd341856d8eb83d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 15:18:16 -0500 Subject: [PATCH 29/55] kprobes/tracing: Use trace_seq_has_overflowed() for overflow checks Instead of checking the return value of trace_seq_printf() and friends for overflowing of the buffer, use the trace_seq_has_overflowed() helper function. This cleans up the code quite a bit and also takes us a step closer to changing the return values of trace_seq_printf() and friends to void. Link: http://lkml.kernel.org/r/20141114011411.181812785@goodmis.org Reviewed-by: Srikar Dronamraju Reviewed-by: Petr Mladek Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 42 ++++++++++++++----------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ef7e7f16ed1a..5edb518be345 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1024,27 +1024,22 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) - goto partial; + trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) - goto partial; + goto out; - if (!trace_seq_putc(s, ')')) - goto partial; + trace_seq_putc(s, ')'); data = (u8 *)&field[1]; for (i = 0; i < tp->nr_args; i++) if (!tp->args[i].type->print(s, tp->args[i].name, data + tp->args[i].offset, field)) - goto partial; + goto out; - if (!trace_seq_putc(s, '\n')) - goto partial; - - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_putc(s, '\n'); + out: + return trace_handle_return(s); } static enum print_line_t @@ -1060,33 +1055,28 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) - goto partial; + trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) - goto partial; + goto out; - if (!trace_seq_puts(s, " <- ")) - goto partial; + trace_seq_puts(s, " <- "); if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) - goto partial; + goto out; - if (!trace_seq_putc(s, ')')) - goto partial; + trace_seq_putc(s, ')'); data = (u8 *)&field[1]; for (i = 0; i < tp->nr_args; i++) if (!tp->args[i].type->print(s, tp->args[i].name, data + tp->args[i].offset, field)) - goto partial; + goto out; - if (!trace_seq_putc(s, '\n')) - goto partial; + trace_seq_putc(s, '\n'); - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; + out: + return trace_handle_return(s); } From a72e10afab565385d51e5117d7cffe4a667c1b62 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 16:26:06 -0500 Subject: [PATCH 30/55] tracing: Do not check return values of trace_seq_p*() for mmio tracer The return values for trace_seq_printf() and friends are going to be removed and they will become void functions. The mmio tracer checked their return and even did so incorrectly. Some of the funtions which returned the values were never checked themselves. Removing all the checks simplifies the code. Use trace_seq_has_overflowed() and trace_handle_return() where necessary instead. Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/trace_mmiotrace.c | 52 ++++++++++++++-------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0abd9b863474..7a9ba62e9fef 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -59,17 +59,15 @@ static void mmio_trace_start(struct trace_array *tr) mmio_reset_data(tr); } -static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) +static void mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) { - int ret = 0; int i; resource_size_t start, end; const struct pci_driver *drv = pci_dev_driver(dev); - /* XXX: incomplete checks for trace_seq_printf() return value */ - ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", - dev->bus->number, dev->devfn, - dev->vendor, dev->device, dev->irq); + trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", + dev->bus->number, dev->devfn, + dev->vendor, dev->device, dev->irq); /* * XXX: is pci_resource_to_user() appropriate, since we are * supposed to interpret the __ioremap() phys_addr argument based on @@ -77,21 +75,20 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) */ for (i = 0; i < 7; i++) { pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); - ret += trace_seq_printf(s, " %llx", + trace_seq_printf(s, " %llx", (unsigned long long)(start | (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); } for (i = 0; i < 7; i++) { pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); - ret += trace_seq_printf(s, " %llx", + trace_seq_printf(s, " %llx", dev->resource[i].start < dev->resource[i].end ? (unsigned long long)(end - start) + 1 : 0); } if (drv) - ret += trace_seq_printf(s, " %s\n", drv->name); + trace_seq_printf(s, " %s\n", drv->name); else - ret += trace_seq_puts(s, " \n"); - return ret; + trace_seq_puts(s, " \n"); } static void destroy_header_iter(struct header_iter *hiter) @@ -179,28 +176,27 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; - int ret = 1; trace_assign_type(field, entry); rw = &field->rw; switch (rw->opcode) { case MMIO_READ: - ret = trace_seq_printf(s, + trace_seq_printf(s, "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_WRITE: - ret = trace_seq_printf(s, + trace_seq_printf(s, "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: - ret = trace_seq_printf(s, + trace_seq_printf(s, "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx," "%02lx 0x%lx %d\n", secs, usec_rem, rw->map_id, @@ -209,12 +205,11 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) (rw->value >> 0) & 0xff, rw->pc, 0); break; default: - ret = trace_seq_puts(s, "rw what?\n"); + trace_seq_puts(s, "rw what?\n"); break; } - if (ret) - return TRACE_TYPE_HANDLED; - return TRACE_TYPE_PARTIAL_LINE; + + return trace_handle_return(s); } static enum print_line_t mmio_print_map(struct trace_iterator *iter) @@ -226,31 +221,29 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; - int ret; trace_assign_type(field, entry); m = &field->map; switch (m->opcode) { case MMIO_PROBE: - ret = trace_seq_printf(s, + trace_seq_printf(s, "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", secs, usec_rem, m->map_id, (unsigned long long)m->phys, m->virt, m->len, 0UL, 0); break; case MMIO_UNPROBE: - ret = trace_seq_printf(s, + trace_seq_printf(s, "UNMAP %u.%06lu %d 0x%lx %d\n", secs, usec_rem, m->map_id, 0UL, 0); break; default: - ret = trace_seq_puts(s, "map what?\n"); + trace_seq_puts(s, "map what?\n"); break; } - if (ret) - return TRACE_TYPE_HANDLED; - return TRACE_TYPE_PARTIAL_LINE; + + return trace_handle_return(s); } static enum print_line_t mmio_print_mark(struct trace_iterator *iter) @@ -262,14 +255,11 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; - int ret; /* The trailing newline must be in the message. */ - ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t mmio_print_line(struct trace_iterator *iter) From d2b0191a38cb1811fa69f645ff4b37e3a23a956d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 17:19:51 -0500 Subject: [PATCH 31/55] tracing/probes: Do not use return value of trace_seq_printf() The functions trace_seq_printf() and friends will soon not have a return value and will only be a void function. Use trace_seq_has_overflowed() instead to know if the trace_seq operations succeeded or not. Link: http://lkml.kernel.org/r/20141114011411.530216306@goodmis.org Reviewed-by: Petr Mladek Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_probe.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d4b9fc22cd27..b983b2fd2ca1 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -40,7 +40,8 @@ const char *reserved_field_names[] = { int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ void *data, void *ent) \ { \ - return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ + trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ + return !trace_seq_has_overflowed(s); \ } \ const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); @@ -61,10 +62,11 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, int len = *(u32 *)data >> 16; if (!len) - return trace_seq_printf(s, " %s=(fault)", name); + trace_seq_printf(s, " %s=(fault)", name); else - return trace_seq_printf(s, " %s=\"%s\"", name, - (const char *)get_loc_data(data, ent)); + trace_seq_printf(s, " %s=\"%s\"", name, + (const char *)get_loc_data(data, ent)); + return !trace_seq_has_overflowed(s); } NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); From 8579a107a6490511bbe35d6f0be5be5b8b697775 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 17:26:57 -0500 Subject: [PATCH 32/55] tracing/uprobes: Do not use return values of trace_seq_printf() The functions trace_seq_printf() and friends will soon no longer have return values. Using trace_seq_has_overflowed() and trace_handle_return() should be used instead. Link: http://lkml.kernel.org/r/20141114011411.693008134@goodmis.org Link: http://lkml.kernel.org/r/20141115050602.333705855@goodmis.org Reviewed-by: Masami Hiramatsu Acked-by: Srikar Dronamraju Acked-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e35327c787f7..fd76f8e108ef 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -852,16 +852,14 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e tu = container_of(event, struct trace_uprobe, tp.call.event); if (is_ret_probe(tu)) { - if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", - ftrace_event_name(&tu->tp.call), - entry->vaddr[1], entry->vaddr[0])) - goto partial; + trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", + ftrace_event_name(&tu->tp.call), + entry->vaddr[1], entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, true); } else { - if (!trace_seq_printf(s, "%s: (0x%lx)", - ftrace_event_name(&tu->tp.call), - entry->vaddr[0])) - goto partial; + trace_seq_printf(s, "%s: (0x%lx)", + ftrace_event_name(&tu->tp.call), + entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, false); } @@ -869,14 +867,13 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e struct probe_arg *parg = &tu->tp.args[i]; if (!parg->type->print(s, parg->name, data + parg->offset, entry)) - goto partial; + goto out; } - if (trace_seq_putc(s, '\n')) - return TRACE_TYPE_HANDLED; + trace_seq_putc(s, '\n'); -partial: - return TRACE_TYPE_PARTIAL_LINE; + out: + return trace_handle_return(s); } typedef bool (*filter_func_t)(struct uprobe_consumer *self, From 183742f08c5532c0cd3c3d3fa184a26c092e2157 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 17:41:33 -0500 Subject: [PATCH 33/55] tracing: Do not use return values of trace_seq_printf() in syscall tracing The functions trace_seq_printf() and friends will not be returning values soon and will be void functions. To know if they succeeded or not, the functions trace_seq_has_overflowed() and trace_handle_return() should be used instead. Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 47 ++++++++++++++--------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4dc8b79c5f75..a72f3d8d813e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -114,7 +114,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags, struct trace_entry *ent = iter->ent; struct syscall_trace_enter *trace; struct syscall_metadata *entry; - int i, ret, syscall; + int i, syscall; trace = (typeof(trace))ent; syscall = trace->nr; @@ -128,35 +128,28 @@ print_syscall_enter(struct trace_iterator *iter, int flags, goto end; } - ret = trace_seq_printf(s, "%s(", entry->name); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%s(", entry->name); for (i = 0; i < entry->nb_args; i++) { + + if (trace_seq_has_overflowed(s)) + goto end; + /* parameter types */ - if (trace_flags & TRACE_ITER_VERBOSE) { - ret = trace_seq_printf(s, "%s ", entry->types[i]); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + if (trace_flags & TRACE_ITER_VERBOSE) + trace_seq_printf(s, "%s ", entry->types[i]); + /* parameter values */ - ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], - trace->args[i], - i == entry->nb_args - 1 ? "" : ", "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_printf(s, "%s: %lx%s", entry->args[i], + trace->args[i], + i == entry->nb_args - 1 ? "" : ", "); } - ret = trace_seq_putc(s, ')'); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - + trace_seq_putc(s, ')'); end: - ret = trace_seq_putc(s, '\n'); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + trace_seq_putc(s, '\n'); - return TRACE_TYPE_HANDLED; + return trace_handle_return(s); } static enum print_line_t @@ -168,7 +161,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags, struct syscall_trace_exit *trace; int syscall; struct syscall_metadata *entry; - int ret; trace = (typeof(trace))ent; syscall = trace->nr; @@ -176,7 +168,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags, if (!entry) { trace_seq_putc(s, '\n'); - return TRACE_TYPE_HANDLED; + goto out; } if (entry->exit_event->event.type != ent->type) { @@ -184,12 +176,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags, return TRACE_TYPE_UNHANDLED; } - ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, + trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, trace->ret); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + out: + return trace_handle_return(s); } extern char *__bad_type_size(void); From dba39448abb7340f86ae9b062f99d7acacb5d2d2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 12 Nov 2014 18:07:22 -0500 Subject: [PATCH 34/55] tracing: Remove return values of most trace_seq_*() functions The trace_seq_printf() and friends are used to store strings into a buffer that can be passed around from function to function. If the trace_seq buffer fills up, it will not print any more. The return values were somewhat inconsistant and using trace_seq_has_overflowed() was a better way to know if the write to the trace_seq buffer succeeded or not. Now that all users have removed reading the return value of the printf() type functions, they can safely return void and keep future users of them from reading the inconsistent values as well. Link: http://lkml.kernel.org/r/20141114011411.992510720@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 37 +++++++---------- kernel/trace/trace_seq.c | 84 +++++++++++---------------------------- 2 files changed, 38 insertions(+), 83 deletions(-) diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 07eda413dfcf..db8a73224f1a 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -57,40 +57,37 @@ static inline bool trace_seq_has_overflowed(struct trace_seq *s) */ #ifdef CONFIG_TRACING extern __printf(2, 3) -int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +void trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern __printf(2, 0) -int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); -extern int +void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); +extern void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int trace_print_seq(struct seq_file *m, struct trace_seq *s); extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt); -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); -extern int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len); -extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, +extern void trace_seq_puts(struct trace_seq *s, const char *str); +extern void trace_seq_putc(struct trace_seq *s, unsigned char c); +extern void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len); +extern void trace_seq_putmem_hex(struct trace_seq *s, const void *mem, unsigned int len); extern int trace_seq_path(struct trace_seq *s, const struct path *path); -extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, +extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, int nmaskbits); #else /* CONFIG_TRACING */ -static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +static inline void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { - return 0; } -static inline int +static inline void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { - return 0; } -static inline int +static inline void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, int nmaskbits) { - return 0; } static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s) @@ -102,23 +99,19 @@ static inline int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, { return 0; } -static inline int trace_seq_puts(struct trace_seq *s, const char *str) +static inline void trace_seq_puts(struct trace_seq *s, const char *str) { - return 0; } -static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) +static inline void trace_seq_putc(struct trace_seq *s, unsigned char c) { - return 0; } -static inline int +static inline void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) { - return 0; } -static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, +static inline void trace_seq_putmem_hex(struct trace_seq *s, const void *mem, unsigned int len) { - return 0; } static inline int trace_seq_path(struct trace_seq *s, const struct path *path) { diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index b100994a17fe..fabfa0f190a3 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -69,20 +69,15 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) * trace_seq_printf() is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. - * - * Returns 1 if we successfully written all the contents to - * the buffer. - * Returns 0 if we the length to write is bigger than the - * reserved buffer space. In this case, nothing gets written. */ -int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { unsigned int len = TRACE_SEQ_BUF_LEFT(s); va_list ap; int ret; if (s->full || !len) - return 0; + return; va_start(ap, fmt); ret = vsnprintf(s->buffer + s->len, len, fmt, ap); @@ -91,12 +86,10 @@ int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) /* If we can't write it all, don't bother writing anything */ if (ret >= len) { s->full = 1; - return 0; + return; } s->len += ret; - - return 1; } EXPORT_SYMBOL_GPL(trace_seq_printf); @@ -107,25 +100,18 @@ EXPORT_SYMBOL_GPL(trace_seq_printf); * @nmaskbits: The number of bits that are valid in @maskp * * Writes a ASCII representation of a bitmask string into @s. - * - * Returns 1 if we successfully written all the contents to - * the buffer. - * Returns 0 if we the length to write is bigger than the - * reserved buffer space. In this case, nothing gets written. */ -int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, +void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, int nmaskbits) { unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) - return 0; + return; ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits); s->len += ret; - - return 1; } EXPORT_SYMBOL_GPL(trace_seq_bitmask); @@ -139,28 +125,24 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask); * trace_seq_printf is used to store strings into a special * buffer (@s). Then the output may be either used by * the sequencer or pulled into another buffer. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) { unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) - return 0; + return; ret = vsnprintf(s->buffer + s->len, len, fmt, args); /* If we can't write it all, don't bother writing anything */ if (ret >= len) { s->full = 1; - return 0; + return; } s->len += ret; - - return len; } EXPORT_SYMBOL_GPL(trace_seq_vprintf); @@ -178,28 +160,24 @@ EXPORT_SYMBOL_GPL(trace_seq_vprintf); * * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { unsigned int len = TRACE_SEQ_BUF_LEFT(s); int ret; if (s->full || !len) - return 0; + return; ret = bstr_printf(s->buffer + s->len, len, fmt, binary); /* If we can't write it all, don't bother writing anything */ if (ret >= len) { s->full = 1; - return 0; + return; } s->len += ret; - - return len; } EXPORT_SYMBOL_GPL(trace_seq_bprintf); @@ -212,25 +190,21 @@ EXPORT_SYMBOL_GPL(trace_seq_bprintf); * copy to user routines. This function records a simple string * into a special buffer (@s) for later retrieval by a sequencer * or other mechanism. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_puts(struct trace_seq *s, const char *str) +void trace_seq_puts(struct trace_seq *s, const char *str) { unsigned int len = strlen(str); if (s->full) - return 0; + return; if (len > TRACE_SEQ_BUF_LEFT(s)) { s->full = 1; - return 0; + return; } memcpy(s->buffer + s->len, str, len); s->len += len; - - return len; } EXPORT_SYMBOL_GPL(trace_seq_puts); @@ -243,22 +217,18 @@ EXPORT_SYMBOL_GPL(trace_seq_puts); * copy to user routines. This function records a simple charater * into a special buffer (@s) for later retrieval by a sequencer * or other mechanism. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_putc(struct trace_seq *s, unsigned char c) +void trace_seq_putc(struct trace_seq *s, unsigned char c) { if (s->full) - return 0; + return; if (TRACE_SEQ_BUF_LEFT(s) < 1) { s->full = 1; - return 0; + return; } s->buffer[s->len++] = c; - - return 1; } EXPORT_SYMBOL_GPL(trace_seq_putc); @@ -271,23 +241,19 @@ EXPORT_SYMBOL_GPL(trace_seq_putc); * There may be cases where raw memory needs to be written into the * buffer and a strcpy() would not work. Using this function allows * for such cases. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) +void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) { if (s->full) - return 0; + return; if (len > TRACE_SEQ_BUF_LEFT(s)) { s->full = 1; - return 0; + return; } memcpy(s->buffer + s->len, mem, len); s->len += len; - - return len; } EXPORT_SYMBOL_GPL(trace_seq_putmem); @@ -303,20 +269,17 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem); * This is similar to trace_seq_putmem() except instead of just copying the * raw memory into the buffer it writes its ASCII representation of it * in hex characters. - * - * Returns how much it wrote to the buffer. */ -int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, +void trace_seq_putmem_hex(struct trace_seq *s, const void *mem, unsigned int len) { unsigned char hex[HEX_CHARS]; const unsigned char *data = mem; unsigned int start_len; int i, j; - int cnt = 0; if (s->full) - return 0; + return; while (len) { start_len = min(len, HEX_CHARS - 1); @@ -335,9 +298,8 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, len -= j / 2; hex[j++] = ' '; - cnt += trace_seq_putmem(s, hex, j); + trace_seq_putmem(s, hex, j); } - return cnt; } EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); From 8e2e095cbecad21078332cd83ae7e61ecfe12c59 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 14 Nov 2014 11:42:06 -0500 Subject: [PATCH 35/55] tracing: Fix return value of ftrace_raw_output_prep() If the trace_seq of ftrace_raw_output_prep() is full this function returns TRACE_TYPE_PARTIAL_LINE, otherwise it returns zero. The problem is that TRACE_TYPE_PARTIAL_LINE happens to be zero! The thing is, the caller of ftrace_raw_output_prep() expects a success to be zero. Change that to expect it to be TRACE_TYPE_HANDLED. Link: http://lkml.kernel.org/r/20141114112522.GA2988@dhcp128.suse.cz Reminded-by: Petr Mladek Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- kernel/trace/trace_output.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index f13471b5d27a..139b5067345b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -277,7 +277,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ field = (typeof(field))iter->ent; \ \ ret = ftrace_raw_output_prep(iter, trace_event); \ - if (ret) \ + if (ret != TRACE_TYPE_HANDLED) \ return ret; \ \ trace_seq_printf(s, print); \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 163c11b6b8ff..723818bc83b4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -196,10 +196,7 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, trace_seq_init(p); trace_seq_printf(s, "%s: ", ftrace_event_name(event)); - if (trace_seq_has_overflowed(s)) - return TRACE_TYPE_PARTIAL_LINE; - - return 0; + return trace_handle_return(s); } EXPORT_SYMBOL(ftrace_raw_output_prep); From 16a8ef2751801346f1f76a18685b2beb63cd170f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 16 Nov 2014 14:46:28 +0100 Subject: [PATCH 36/55] tracing: Deletion of an unnecessary check before iput() The iput() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/5468F875.7080907@users.sourceforge.net Signed-off-by: Markus Elfring Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index fd76f8e108ef..8520acc34b18 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -552,8 +552,7 @@ error: return ret; fail_address_parse: - if (inode) - iput(inode); + iput(inode); pr_info("Failed to parse address or file.\n"); From 0af26492d5f5c00a08d52e9f3f3831faead90246 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 20 Nov 2014 10:05:36 -0500 Subject: [PATCH 37/55] tracing/trivial: Fix typos and make an int into a bool Fix up a few typos in comments and convert an int into a bool in update_traceon_count(). Link: http://lkml.kernel.org/r/546DD445.5080108@hitachi.com Suggested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_functions.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa0f36bb32e9..588af40d33db 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1119,7 +1119,7 @@ static struct ftrace_ops global_ops = { /* * This is used by __kernel_text_address() to return true if the - * the address is on a dynamically allocated trampoline that would + * address is on a dynamically allocated trampoline that would * not return true for either core_kernel_text() or * is_module_text_address(). */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 973db52eb070..fcd41a166405 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -261,14 +261,14 @@ static struct tracer function_trace __tracer_data = }; #ifdef CONFIG_DYNAMIC_FTRACE -static void update_traceon_count(void **data, int on) +static void update_traceon_count(void **data, bool on) { long *count = (long *)data; long old_count = *count; /* * Tracing gets disabled (or enabled) once per count. - * This function can be called at the same time on mulitple CPUs. + * This function can be called at the same time on multiple CPUs. * It is fine if both disable (or enable) tracing, as disabling * (or enabling) the second time doesn't do anything as the * state of the tracer is already disabled (or enabled). @@ -288,7 +288,7 @@ static void update_traceon_count(void **data, int on) * the new state is visible before changing the counter by * one minus the old counter. This guarantees that another CPU * executing this code will see the new state before seeing - * the new counter value, and would not do anthing if the new + * the new counter value, and would not do anything if the new * counter is seen. * * Note, there is no synchronization between this and a user From a017784f1b236cbc42ce83b4345a667c21113481 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 Oct 2014 13:01:06 +0000 Subject: [PATCH 38/55] kprobes/ftrace: Recover original IP if pre_handler doesn't change it Recover original IP register if the pre_handler doesn't change it. Since current kprobes doesn't expect that another ftrace handler may change regs->ip, it sets kprobe.addr + MCOUNT_INSN_SIZE to regs->ip and returns to ftrace. This seems wrong behavior since kprobes can recover regs->ip and safely pass it to another handler. This adds code which recovers original regs->ip passed from ftrace right before returning to ftrace, so that another ftrace user can change regs->ip. Link: http://lkml.kernel.org/r/20141009130106.4698.26362.stgit@kbuild-f20.novalocal Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes/ftrace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 717b02a22e67..5f8f0b3cc674 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -27,7 +27,7 @@ static nokprobe_inline int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) + struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* * Emulate singlestep (and also recover regs->ip) @@ -39,6 +39,8 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, p->post_handler(p, regs, 0); } __this_cpu_write(current_kprobe, NULL); + if (orig_ip) + regs->ip = orig_ip; return 1; } @@ -46,7 +48,7 @@ int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb); + return __skip_singlestep(p, regs, kcb, 0); else return 0; } @@ -71,13 +73,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { + unsigned long orig_ip = regs->ip; /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (!p->pre_handler || !p->pre_handler(p, regs)) - __skip_singlestep(p, regs, kcb); + __skip_singlestep(p, regs, kcb, orig_ip); /* * If pre_handler returns !0, it sets regs->ip and * resets current kprobe. From f8b8be8a310a55856fd2c369dade08088d85df3b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Nov 2014 05:25:16 -0500 Subject: [PATCH 39/55] ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among ftrace users who may modify regs->ip to change the execution path. If two or more users modify the regs->ip on the same function entry, one of them will be broken. So they must add IPMODIFY flag and make sure that ftrace_set_filter_ip() succeeds. Note that ftrace doesn't allow ftrace_ops which has IPMODIFY flag to have notrace hash, and the ftrace_ops must have a filter hash (so that the ftrace_ops can hook only specific entries), because it strongly depends on the address and must be allowed for only few selected functions. Link: http://lkml.kernel.org/r/20141121102516.11844.27829.stgit@localhost.localdomain Cc: Jiri Kosina Cc: Seth Jennings Cc: Petr Mladek Cc: Vojtech Pavlik Cc: Miroslav Benes Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Josh Poimboeuf Cc: Namhyung Kim Signed-off-by: Masami Hiramatsu [ fixed up some of the comments ] Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 5 ++ include/linux/ftrace.h | 16 +++- kernel/trace/ftrace.c | 142 ++++++++++++++++++++++++++++++++- 3 files changed, 159 insertions(+), 4 deletions(-) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 4da42616939f..f10f5f5d260d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files: will be displayed on the same line as the function that is returning registers. + If the callback registered to be traced by a function with + the "ip modify" attribute (thus the regs->ip can be changed), + an 'I' will be displayed on the same line as the function that + can be overridden. + function_profile_enabled: When set it will enable all functions with either the function diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7b2616fa2472..ed501953f0b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -61,6 +61,11 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. + * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and + * IPMODIFY are a kind of attribute flags which can be set only before + * registering the ftrace_ops, and can not be modified while registered. + * Changing those attribute flags after regsitering ftrace_ops will + * cause unexpected results. * * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically @@ -101,6 +106,10 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * The ftrace_ops trampoline can be set by the ftrace users, and * in such cases the arch must not modify it. Only the arch ftrace * core code should set this flag. + * IPMODIFY - The ops can modify the IP register. This can only be set with + * SAVE_REGS. If another ops with this flag set is already registered + * for any of the functions that this ops will be registered for, then + * this ops will fail to register or set_filter_ip. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -116,6 +125,7 @@ enum { FTRACE_OPS_FL_REMOVING = 1 << 10, FTRACE_OPS_FL_MODIFYING = 1 << 11, FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, + FTRACE_OPS_FL_IPMODIFY = 1 << 13, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -310,6 +320,7 @@ bool is_ftrace_trampoline(unsigned long addr); * ENABLED - the function is being traced * REGS - the record wants the function to save regs * REGS_EN - the function is set up to save regs. + * IPMODIFY - the record allows for the IP address to be changed. * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flag REGS is set. When the function has been @@ -323,10 +334,11 @@ enum { FTRACE_FL_REGS_EN = (1UL << 29), FTRACE_FL_TRAMP = (1UL << 28), FTRACE_FL_TRAMP_EN = (1UL << 27), + FTRACE_FL_IPMODIFY = (1UL << 26), }; -#define FTRACE_REF_MAX_SHIFT 27 -#define FTRACE_FL_BITS 5 +#define FTRACE_REF_MAX_SHIFT 26 +#define FTRACE_FL_BITS 6 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 588af40d33db..929a733d302e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1358,6 +1358,9 @@ ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash); static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); +static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, + struct ftrace_hash *new_hash); + static int ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src) @@ -1368,8 +1371,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash *new_hash; int size = src->count; int bits = 0; + int ret; int i; + /* Reject setting notrace hash on IPMODIFY ftrace_ops */ + if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) + return -EINVAL; + /* * If the new source is empty, just free dst and assign it * the empty_hash. @@ -1403,6 +1411,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, } update: + /* Make sure this can be applied if it is IPMODIFY ftrace_ops */ + if (enable) { + /* IPMODIFY should be updated only when filter_hash updating */ + ret = ftrace_hash_ipmodify_update(ops, new_hash); + if (ret < 0) { + free_ftrace_hash(new_hash); + return ret; + } + } + /* * Remove the current set, update the hash and add * them back. @@ -1767,6 +1785,114 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } +/* + * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK + * or no-needed to update, -EBUSY if it detects a conflict of the flag + * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs. + * Note that old_hash and new_hash has below meanings + * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) + * - If the hash is EMPTY_HASH, it hits nothing + * - Anything else hits the recs which match the hash entries. + */ +static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, + struct ftrace_hash *old_hash, + struct ftrace_hash *new_hash) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec, *end = NULL; + int in_old, in_new; + + /* Only update if the ops has been registered */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return 0; + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return 0; + + /* + * Since the IPMODIFY is a very address sensitive action, we do not + * allow ftrace_ops to set all functions to new hash. + */ + if (!new_hash || !old_hash) + return -EINVAL; + + /* Update rec->flags */ + do_for_each_ftrace_rec(pg, rec) { + /* We need to update only differences of filter_hash */ + in_old = !!ftrace_lookup_ip(old_hash, rec->ip); + in_new = !!ftrace_lookup_ip(new_hash, rec->ip); + if (in_old == in_new) + continue; + + if (in_new) { + /* New entries must ensure no others are using it */ + if (rec->flags & FTRACE_FL_IPMODIFY) + goto rollback; + rec->flags |= FTRACE_FL_IPMODIFY; + } else /* Removed entry */ + rec->flags &= ~FTRACE_FL_IPMODIFY; + } while_for_each_ftrace_rec(); + + return 0; + +rollback: + end = rec; + + /* Roll back what we did above */ + do_for_each_ftrace_rec(pg, rec) { + if (rec == end) + goto err_out; + + in_old = !!ftrace_lookup_ip(old_hash, rec->ip); + in_new = !!ftrace_lookup_ip(new_hash, rec->ip); + if (in_old == in_new) + continue; + + if (in_new) + rec->flags &= ~FTRACE_FL_IPMODIFY; + else + rec->flags |= FTRACE_FL_IPMODIFY; + } while_for_each_ftrace_rec(); + +err_out: + return -EBUSY; +} + +static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + + if (ftrace_hash_empty(hash)) + hash = NULL; + + return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); +} + +/* Disabling always succeeds */ +static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + + if (ftrace_hash_empty(hash)) + hash = NULL; + + __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); +} + +static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, + struct ftrace_hash *new_hash) +{ + struct ftrace_hash *old_hash = ops->func_hash->filter_hash; + + if (ftrace_hash_empty(old_hash)) + old_hash = NULL; + + if (ftrace_hash_empty(new_hash)) + new_hash = NULL; + + return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); +} + static void print_ip_ins(const char *fmt, unsigned char *p) { int i; @@ -2436,6 +2562,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) */ ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; + ret = ftrace_hash_ipmodify_enable(ops); + if (ret < 0) { + /* Rollback registration process */ + __unregister_ftrace_function(ops); + ftrace_start_up--; + ops->flags &= ~FTRACE_OPS_FL_ENABLED; + return ret; + } + ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); @@ -2464,6 +2599,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) */ WARN_ON_ONCE(ftrace_start_up < 0); + /* Disabling ipmodify never fails */ + ftrace_hash_ipmodify_disable(ops); ftrace_hash_rec_disable(ops, 1); ops->flags &= ~FTRACE_OPS_FL_ENABLED; @@ -3058,9 +3195,10 @@ static int t_show(struct seq_file *m, void *v) if (iter->flags & FTRACE_ITER_ENABLED) { struct ftrace_ops *ops = NULL; - seq_printf(m, " (%ld)%s", + seq_printf(m, " (%ld)%s%s", ftrace_rec_count(rec), - rec->flags & FTRACE_FL_REGS ? " R" : " "); + rec->flags & FTRACE_FL_REGS ? " R" : " ", + rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) From 1d70be34df59f34b4562b8c047fc3a5069ce17b4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Nov 2014 05:25:23 -0500 Subject: [PATCH 40/55] kprobes: Add IPMODIFY flag to kprobe_ftrace_ops Add FTRACE_OPS_FL_IPMODIFY flag to kprobe_ftrace_ops since kprobes can changes regs->ip. Link: http://lkml.kernel.org/r/20141121102523.11844.21298.stgit@localhost.localdomain Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3995f546d0f3..831978cebf1d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) #ifdef CONFIG_KPROBES_ON_FTRACE static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { .func = kprobe_ftrace_handler, - .flags = FTRACE_OPS_FL_SAVE_REGS, + .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY, }; static int kprobe_ftrace_enabled; From 62a207d748dd9224140a634786b274fdb6ece0b9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:58:17 -0500 Subject: [PATCH 41/55] ftrace/x86: Have static function tracing always test for function graph New updates to the ftrace generic code had ftrace_stub not always being called when ftrace is off. This causes the static tracer to always save and restore functions. But it also showed that when function tracing is running, the function graph tracer can not. We should always check to see if function graph tracing is running even if the function tracer is running too. The function tracer code is not the only one that uses the hook to function mcount. Cc: Markos Chandras Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 35a793fa4bba..6dc134b8dc70 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -194,6 +194,7 @@ ENTRY(function_hook) cmpq $ftrace_stub, ftrace_trace_function jnz trace +fgraph_trace: #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpq $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller @@ -220,7 +221,7 @@ trace: MCOUNT_RESTORE_FRAME - jmp ftrace_stub + jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ From 76c2f13c5515979adab5de3ebda27c309f459a7b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:54:27 -0500 Subject: [PATCH 42/55] ftrace/x86: Have static tracing also use ftrace_caller_setup Linus pointed out that there were locations that did the hard coded update of the parent and rip parameters. One of them was the static tracer which could also use the ftrace_caller_setup to do that work. In fact, because it did not use it, it is prone to bugs, and since the static tracer is hardly ever used (who wants function tracing code always being called?) it doesn't get tested very often. I only run a few "does it still work" tests on it. But I do not run stress tests on that code. Although, since it is never turned off, just having it on should be stressful enough. (especially for the performance folks) There's no reason that the static tracer can't also use ftrace_caller_setup. Have it do so. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 6dc134b8dc70..24842c701660 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,12 +21,6 @@ # define function_hook mcount #endif -#ifdef CONFIG_DYNAMIC_FTRACE - -ENTRY(function_hook) - retq -END(function_hook) - /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip @@ -47,6 +41,12 @@ GLOBAL(\trace_label) #endif .endm +#ifdef CONFIG_DYNAMIC_FTRACE + +ENTRY(function_hook) + retq +END(function_hook) + #ifdef CONFIG_FRAME_POINTER /* * Stack traces will stop at the ftrace trampoline if the frame pointer @@ -207,15 +207,7 @@ GLOBAL(ftrace_stub) retq trace: - MCOUNT_SAVE_FRAME - - movq RIP(%rsp), %rdi -#ifdef CC_USING_FENTRY - movq SS+16(%rsp), %rsi -#else - movq 8(%rbp), %rsi -#endif - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup ftrace_caller_op_ptr call *ftrace_trace_function From 4bcdf1522fb11b6c3a3dabe4432b54da2bd6bc0e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 11:30:58 -0500 Subject: [PATCH 43/55] ftrace/x86: Move MCOUNT_SAVE_FRAME out of header file Linus pointed out that MCOUNT_SAVE_FRAME is used in only a single file and that there's no reason that it should be in a header file. Move the macro to the code that uses it. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 33 --------------------------------- arch/x86/kernel/mcount_64.S | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index e1f7fecaa7d6..f45acad3c4b6 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -1,39 +1,6 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H -#ifdef __ASSEMBLY__ - - /* skip is set if the stack was already partially adjusted */ - .macro MCOUNT_SAVE_FRAME skip=0 - /* - * We add enough stack to save all regs. - */ - subq $(SS+8-\skip), %rsp - movq %rax, RAX(%rsp) - movq %rcx, RCX(%rsp) - movq %rdx, RDX(%rsp) - movq %rsi, RSI(%rsp) - movq %rdi, RDI(%rsp) - movq %r8, R8(%rsp) - movq %r9, R9(%rsp) - /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdx - movq %rdx, RIP(%rsp) - .endm - - .macro MCOUNT_RESTORE_FRAME skip=0 - movq R9(%rsp), %r9 - movq R8(%rsp), %r8 - movq RDI(%rsp), %rdi - movq RSI(%rsp), %rsi - movq RDX(%rsp), %rdx - movq RCX(%rsp), %rcx - movq RAX(%rsp), %rax - addq $(SS+8-\skip), %rsp - .endm - -#endif - #ifdef CONFIG_FUNCTION_TRACER #ifdef CC_USING_FENTRY # define MCOUNT_ADDR ((long)(__fentry__)) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 24842c701660..94fe46725fe0 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,6 +21,35 @@ # define function_hook mcount #endif +/* skip is set if the stack was already partially adjusted */ +.macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) + .endm + +.macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp + .endm + /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 MCOUNT_SAVE_FRAME \skip From 05df710ec343cf351475e86bfaf89938e5114d69 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 11:43:39 -0500 Subject: [PATCH 44/55] ftrace/x86: Rename MCOUNT_SAVE_FRAME and add more detailed comments The name MCOUNT_SAVE_FRAME is rather confusing as it really isn't a function frame that is saved, but just the required mcount registers that are needed to be saved before C code may be called. The word "frame" confuses it as being a function frame which it is not. Rename MCOUNT_SAVE_FRAME and MCOUNT_RESTORE_FRAME to save_mcount_regs and restore_mcount_regs respectively. Noticed the lower case, which keeps it from screaming at the reviewers. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 94fe46725fe0..0a693d011980 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,8 +21,24 @@ # define function_hook mcount #endif +/* + * gcc -pg option adds a call to 'mcount' in most functions. + * When -mfentry is used, the call is to 'fentry' and not 'mcount' + * and is done before the function's stack frame is set up. + * They both require a set of regs to be saved before calling + * any C code and restored before returning back to the function. + * + * On boot up, all these calls are converted into nops. When tracing + * is enabled, the call can jump to either ftrace_caller or + * ftrace_regs_caller. Callbacks (tracing functions) that require + * ftrace_regs_caller (like kprobes) need to have pt_regs passed to + * it. For this reason, the size of the pt_regs structure will be + * allocated on the stack and the required mcount registers will + * be saved in the locations that pt_regs has them in. + */ + /* skip is set if the stack was already partially adjusted */ -.macro MCOUNT_SAVE_FRAME skip=0 +.macro save_mcount_regs skip=0 /* * We add enough stack to save all regs. */ @@ -39,7 +55,7 @@ movq %rdx, RIP(%rsp) .endm -.macro MCOUNT_RESTORE_FRAME skip=0 +.macro restore_mcount_regs skip=0 movq R9(%rsp), %r9 movq R8(%rsp), %r8 movq RDI(%rsp), %rdi @@ -52,7 +68,7 @@ /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup trace_label skip=0 - MCOUNT_SAVE_FRAME \skip + save_mcount_regs \skip /* Save this location */ GLOBAL(\trace_label) @@ -121,7 +137,7 @@ GLOBAL(ftrace_call) restore_frame - MCOUNT_RESTORE_FRAME + restore_mcount_regs /* * The copied trampoline must call ftrace_return as it @@ -196,7 +212,7 @@ GLOBAL(ftrace_regs_call) movq RBX(%rsp), %rbx /* skip=8 to skip flags saved in SS */ - MCOUNT_RESTORE_FRAME 8 + restore_mcount_regs 8 /* Restore flags */ popfq @@ -240,7 +256,7 @@ trace: call *ftrace_trace_function - MCOUNT_RESTORE_FRAME + restore_mcount_regs jmp fgraph_trace END(function_hook) @@ -249,7 +265,7 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - MCOUNT_SAVE_FRAME + save_mcount_regs #ifdef CC_USING_FENTRY leaq SS+16(%rsp), %rdi @@ -263,7 +279,7 @@ ENTRY(ftrace_graph_caller) call prepare_ftrace_return - MCOUNT_RESTORE_FRAME + restore_mcount_regs retq END(ftrace_graph_caller) From 094dfc545139510f251b9595850aa63fe2a8c131 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 13:21:09 -0500 Subject: [PATCH 45/55] ftrace/x86: Have save_mcount_regs store RIP in %rdi for first parameter Instead of having save_mcount_regs store the RIP in %rdx as a temp register to place it in the proper location of the pt_regs on the stack. Use the %rdi register as the temp register. This lets us remove the extra store in the ftrace_caller_setup macro. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 0a693d011980..4f1b27642495 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -51,8 +51,8 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdx - movq %rdx, RIP(%rsp) + movq SS+8(%rsp), %rdi + movq %rdi, RIP(%rsp) .endm .macro restore_mcount_regs skip=0 @@ -75,8 +75,7 @@ GLOBAL(\trace_label) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx - /* Load ip into the first parameter */ - movq RIP(%rsp), %rdi + /* %rdi already has %rip from the save_mcount_regs macro */ subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY From 527aa75b333f90f4f90ac1730762156680a42fe8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 13:06:05 -0500 Subject: [PATCH 46/55] ftrace/x86: Simplify save_mcount_regs on getting RIP Currently save_mcount_regs is passed a "skip" parameter to know how much stack updated the pt_regs, as it tries to keep the saved pt_regs in the same location for all users. This is rather stupid, especially since the part stored on the pt_regs has nothing to do with what is suppose to be in that location. Instead of doing that, just pass in an "added" parameter that lets that macro know how much stack was added before it was called so that it can get to the RIP. But the difference is that it will now offset the pt_regs by that "added" count. The caller now needs to take care of the offset of the pt_regs. This will make it easier to simplify the code later. Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 4f1b27642495..596ac330c1db 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -37,12 +37,12 @@ * be saved in the locations that pt_regs has them in. */ -/* skip is set if the stack was already partially adjusted */ -.macro save_mcount_regs skip=0 +/* @added: the amount of stack added before calling this */ +.macro save_mcount_regs added=0 /* * We add enough stack to save all regs. */ - subq $(SS+8-\skip), %rsp + subq $(SS+8), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -51,11 +51,11 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdi + movq SS+8+\added(%rsp), %rdi movq %rdi, RIP(%rsp) .endm -.macro restore_mcount_regs skip=0 +.macro restore_mcount_regs movq R9(%rsp), %r9 movq R8(%rsp), %r8 movq RDI(%rsp), %rdi @@ -63,12 +63,12 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax - addq $(SS+8-\skip), %rsp + addq $(SS+8), %rsp .endm /* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup trace_label skip=0 - save_mcount_regs \skip +.macro ftrace_caller_setup trace_label added=0 + save_mcount_regs \added /* Save this location */ GLOBAL(\trace_label) @@ -79,9 +79,9 @@ GLOBAL(\trace_label) subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY - movq SS+16(%rsp), %rsi + movq SS+16+\added(%rsp), %rsi #else - movq 8(%rbp), %rsi + movq 8+\added(%rbp), %rsi #endif .endm @@ -156,10 +156,10 @@ GLOBAL(ftrace_stub) END(ftrace_caller) ENTRY(ftrace_regs_caller) - /* Save the current flags before compare (in SS location)*/ + /* Save the current flags before any operations that can change them */ pushfq - /* skip=8 to skip flags saved in SS */ + /* added 8 bytes to save flags */ ftrace_caller_setup ftrace_regs_caller_op_ptr 8 /* Save the rest of pt_regs */ @@ -172,15 +172,15 @@ ENTRY(ftrace_regs_caller) movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ - movq SS(%rsp), %rcx + movq SS+8(%rsp), %rcx movq %rcx, EFLAGS(%rsp) /* Kernel segments */ movq $__KERNEL_DS, %rcx movq %rcx, SS(%rsp) movq $__KERNEL_CS, %rcx movq %rcx, CS(%rsp) - /* Stack - skipping return address */ - leaq SS+16(%rsp), %rcx + /* Stack - skipping return address and flags */ + leaq SS+8*3(%rsp), %rcx movq %rcx, RSP(%rsp) /* regs go into 4th parameter */ @@ -195,11 +195,11 @@ GLOBAL(ftrace_regs_call) /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax - movq %rax, SS(%rsp) + movq %rax, SS+8(%rsp) /* Handlers can change the RIP */ movq RIP(%rsp), %rax - movq %rax, SS+8(%rsp) + movq %rax, SS+8*2(%rsp) /* restore the rest of pt_regs */ movq R15(%rsp), %r15 @@ -210,8 +210,7 @@ GLOBAL(ftrace_regs_call) movq RBP(%rsp), %rbp movq RBX(%rsp), %rbx - /* skip=8 to skip flags saved in SS */ - restore_mcount_regs 8 + restore_mcount_regs /* Restore flags */ popfq From 85f6f0290c4d4667a5afb06e66815bcf5ce2c4f7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 14:26:38 -0500 Subject: [PATCH 47/55] ftrace/x86: Add macro MCOUNT_REG_SIZE for amount of stack used to save mcount regs The macro save_mcount_regs saves regs onto the stack. But to uncouple the amount of stack used in that macro from the users of the macro, we need to have a define that tells all the users how much stack is used by that macro. This way we can change the amount of stack the macro uses without breaking its users. Also remove some dead code that was left over from commit fdc841b58cf5 "ftrace: x86: Remove check of obsolete variable function_trace_stop". Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 596ac330c1db..a0f6f942183a 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,6 +21,9 @@ # define function_hook mcount #endif +/* Size of stack used to save mcount regs in save_mcount_regs */ +#define MCOUNT_REG_SIZE (SS+8) + /* * gcc -pg option adds a call to 'mcount' in most functions. * When -mfentry is used, the call is to 'fentry' and not 'mcount' @@ -42,7 +45,7 @@ /* * We add enough stack to save all regs. */ - subq $(SS+8), %rsp + subq $MCOUNT_REG_SIZE, %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -51,7 +54,7 @@ movq %r8, R8(%rsp) movq %r9, R9(%rsp) /* Move RIP to its proper location */ - movq SS+8+\added(%rsp), %rdi + movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) .endm @@ -63,7 +66,7 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax - addq $(SS+8), %rsp + addq $MCOUNT_REG_SIZE, %rsp .endm /* skip is set if stack has been adjusted */ @@ -79,7 +82,7 @@ GLOBAL(\trace_label) subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ #ifdef CC_USING_FENTRY - movq SS+16+\added(%rsp), %rsi + movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi #else movq 8+\added(%rbp), %rsi #endif @@ -172,7 +175,7 @@ ENTRY(ftrace_regs_caller) movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ - movq SS+8(%rsp), %rcx + movq MCOUNT_REG_SIZE(%rsp), %rcx movq %rcx, EFLAGS(%rsp) /* Kernel segments */ movq $__KERNEL_DS, %rcx @@ -180,7 +183,7 @@ ENTRY(ftrace_regs_caller) movq $__KERNEL_CS, %rcx movq %rcx, CS(%rsp) /* Stack - skipping return address and flags */ - leaq SS+8*3(%rsp), %rcx + leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx movq %rcx, RSP(%rsp) /* regs go into 4th parameter */ @@ -195,11 +198,11 @@ GLOBAL(ftrace_regs_call) /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax - movq %rax, SS+8(%rsp) + movq %rax, MCOUNT_REG_SIZE(%rsp) /* Handlers can change the RIP */ movq RIP(%rsp), %rax - movq %rax, SS+8*2(%rsp) + movq %rax, MCOUNT_REG_SIZE+8(%rsp) /* restore the rest of pt_regs */ movq R15(%rsp), %r15 @@ -225,9 +228,6 @@ GLOBAL(ftrace_regs_caller_end) jmp ftrace_return - popfq - jmp ftrace_stub - END(ftrace_regs_caller) @@ -266,7 +266,7 @@ ENTRY(ftrace_graph_caller) save_mcount_regs #ifdef CC_USING_FENTRY - leaq SS+16(%rsp), %rdi + leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else leaq 8(%rbp), %rdi From 0687c36e456ca81feff5f3415e53c1cb8d8efd56 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 18:08:48 -0500 Subject: [PATCH 48/55] ftrace/x86: Have save_mcount_regs macro also save stack frames if needed The save_mcount_regs macro saves and restores the required mcount regs that need to be saved before calling C code. It is done for all the function hook utilities (static tracing, dynamic tracing, regs, function graph). When frame pointers are enabled, the ftrace trampolines need to set up frames and pointers such that a back trace (dump stack) can continue passed them. Currently, a separate macro is used (create_frame) to do this, but it's only done for the ftrace_caller and ftrace_reg_caller functions. It is not done for the static tracer or function graph tracing. Instead of having a separate macro doing the recording of the frames, have the save_mcount_regs perform this task. This also has all tracers saving the frame pointers when needed. Link: http://lkml.kernel.org/r/CA+55aFwF+qCGSKdGaEgW4p6N65GZ5_XTV=1NbtWDvxnd5yYLiw@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 118 ++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 51 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index a0f6f942183a..003b22df1d87 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -21,8 +21,22 @@ # define function_hook mcount #endif +/* All cases save the original rbp (8 bytes) */ +#ifdef CONFIG_FRAME_POINTER +# ifdef CC_USING_FENTRY +/* Save parent and function stack frames (rip and rbp) */ +# define MCOUNT_FRAME_SIZE (8+16*2) +# else +/* Save just function stack frame (rip and rbp) */ +# define MCOUNT_FRAME_SIZE (8+16) +# endif +#else +/* No need to save a stack frame */ +# define MCOUNT_FRAME_SIZE 8 +#endif /* CONFIG_FRAME_POINTER */ + /* Size of stack used to save mcount regs in save_mcount_regs */ -#define MCOUNT_REG_SIZE (SS+8) +#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE) /* * gcc -pg option adds a call to 'mcount' in most functions. @@ -42,10 +56,37 @@ /* @added: the amount of stack added before calling this */ .macro save_mcount_regs added=0 - /* - * We add enough stack to save all regs. - */ - subq $MCOUNT_REG_SIZE, %rsp + + /* Always save the original rbp */ + pushq %rbp + +#ifdef CONFIG_FRAME_POINTER + /* + * Stack traces will stop at the ftrace trampoline if the frame pointer + * is not set up properly. If fentry is used, we need to save a frame + * pointer for the parent as well as the function traced, because the + * fentry is called before the stack frame is set up, where as mcount + * is called afterward. + */ +#ifdef CC_USING_FENTRY + /* Save the parent pointer (skip orig rbp and our return address) */ + pushq \added+8*2(%rsp) + pushq %rbp + movq %rsp, %rbp + /* Save the return address (now skip orig rbp, rbp and parent) */ + pushq \added+8*3(%rsp) +#else + /* Can't assume that rip is before this (unless added was zero) */ + pushq \added+8(%rsp) +#endif + pushq %rbp + movq %rsp, %rbp +#endif /* CONFIG_FRAME_POINTER */ + + /* + * We add enough stack to save all regs. + */ + subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -53,6 +94,13 @@ movq %rdi, RDI(%rsp) movq %r8, R8(%rsp) movq %r9, R9(%rsp) + /* + * Save the original RBP. Even though the mcount ABI does not + * require this, it helps out callers. + */ + movq MCOUNT_REG_SIZE-8(%rsp), %rdx + movq %rdx, RBP(%rsp) + /* Move RIP to its proper location */ movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) @@ -66,7 +114,12 @@ movq RDX(%rsp), %rdx movq RCX(%rsp), %rcx movq RAX(%rsp), %rax + + /* ftrace_regs_caller can modify %rbp */ + movq RBP(%rsp), %rbp + addq $MCOUNT_REG_SIZE, %rsp + .endm /* skip is set if stack has been adjusted */ @@ -84,7 +137,10 @@ GLOBAL(\trace_label) #ifdef CC_USING_FENTRY movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi #else - movq 8+\added(%rbp), %rsi + /* Need to grab the original %rbp */ + movq RBP(%rsp), %rsi + /* Now parent address is 8 above original %rbp */ + movq 8(%rsi), %rsi #endif .endm @@ -94,51 +150,14 @@ ENTRY(function_hook) retq END(function_hook) -#ifdef CONFIG_FRAME_POINTER -/* - * Stack traces will stop at the ftrace trampoline if the frame pointer - * is not set up properly. If fentry is used, we need to save a frame - * pointer for the parent as well as the function traced, because the - * fentry is called before the stack frame is set up, where as mcount - * is called afterward. - */ -.macro create_frame parent rip -#ifdef CC_USING_FENTRY - pushq \parent - pushq %rbp - movq %rsp, %rbp -#endif - pushq \rip - pushq %rbp - movq %rsp, %rbp -.endm - -.macro restore_frame -#ifdef CC_USING_FENTRY - addq $16, %rsp -#endif - popq %rbp - addq $8, %rsp -.endm -#else -.macro create_frame parent rip -.endm -.macro restore_frame -.endm -#endif /* CONFIG_FRAME_POINTER */ - ENTRY(ftrace_caller) ftrace_caller_setup ftrace_caller_op_ptr /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx - create_frame %rsi, %rdi - GLOBAL(ftrace_call) call ftrace_stub - restore_frame - restore_mcount_regs /* @@ -172,7 +191,6 @@ ENTRY(ftrace_regs_caller) movq %r12, R12(%rsp) movq %r11, R11(%rsp) movq %r10, R10(%rsp) - movq %rbp, RBP(%rsp) movq %rbx, RBX(%rsp) /* Copy saved flags */ movq MCOUNT_REG_SIZE(%rsp), %rcx @@ -189,13 +207,9 @@ ENTRY(ftrace_regs_caller) /* regs go into 4th parameter */ leaq (%rsp), %rcx - create_frame %rsi, %rdi - GLOBAL(ftrace_regs_call) call ftrace_stub - restore_frame - /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax movq %rax, MCOUNT_REG_SIZE(%rsp) @@ -210,7 +224,6 @@ GLOBAL(ftrace_regs_call) movq R13(%rsp), %r13 movq R12(%rsp), %r12 movq R10(%rsp), %r10 - movq RBP(%rsp), %rbp movq RBX(%rsp), %rbx restore_mcount_regs @@ -269,8 +282,11 @@ ENTRY(ftrace_graph_caller) leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else - leaq 8(%rbp), %rdi - movq (%rbp), %rdx + /* Need to grab the original %rbp */ + movq RBP(%rsp), %rdx + /* Now parent address is 8 above original %rbp */ + leaq 8(%rdx), %rdi + movq (%rdx), %rdx #endif movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi From f1ab00af816ec8e1ad53229963c863fdd6bcf222 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 21:38:40 -0500 Subject: [PATCH 49/55] ftrace/x86: Get rid of ftrace_caller_setup Move all the work from ftrace_caller_setup into save_mcount_regs. This simplifies the code and makes it easier to understand. Link: http://lkml.kernel.org/r/CA+55aFxUTUbdxpjVMW8X9c=o8sui7OB_MYPfcbJuDyfUWtNrNg@mail.gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/mcount_64.S | 71 ++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 003b22df1d87..ddc766efa1f1 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -54,7 +54,15 @@ * be saved in the locations that pt_regs has them in. */ -/* @added: the amount of stack added before calling this */ +/* + * @added: the amount of stack added before calling this + * + * After this is called, the following registers contain: + * + * %rdi - holds the address that called the trampoline + * %rsi - holds the parent function (traced function's return address) + * %rdx - holds the original %rbp + */ .macro save_mcount_regs added=0 /* Always save the original rbp */ @@ -101,9 +109,24 @@ movq MCOUNT_REG_SIZE-8(%rsp), %rdx movq %rdx, RBP(%rsp) + /* Copy the parent address into %rsi (second parameter) */ +#ifdef CC_USING_FENTRY + movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi +#else + /* %rdx contains original %rbp */ + movq 8(%rdx), %rsi +#endif + /* Move RIP to its proper location */ movq MCOUNT_REG_SIZE+\added(%rsp), %rdi movq %rdi, RIP(%rsp) + + /* + * Now %rdi (the first parameter) has the return address of + * where ftrace_call returns. But the callbacks expect the + * the address of the call itself. + */ + subq $MCOUNT_INSN_SIZE, %rdi .endm .macro restore_mcount_regs @@ -122,28 +145,6 @@ .endm -/* skip is set if stack has been adjusted */ -.macro ftrace_caller_setup trace_label added=0 - save_mcount_regs \added - - /* Save this location */ -GLOBAL(\trace_label) - /* Load the ftrace_ops into the 3rd parameter */ - movq function_trace_op(%rip), %rdx - - /* %rdi already has %rip from the save_mcount_regs macro */ - subq $MCOUNT_INSN_SIZE, %rdi - /* Load the parent_ip into the second parameter */ -#ifdef CC_USING_FENTRY - movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi -#else - /* Need to grab the original %rbp */ - movq RBP(%rsp), %rsi - /* Now parent address is 8 above original %rbp */ - movq 8(%rsi), %rsi -#endif -.endm - #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(function_hook) @@ -151,7 +152,13 @@ ENTRY(function_hook) END(function_hook) ENTRY(ftrace_caller) - ftrace_caller_setup ftrace_caller_op_ptr + /* save_mcount_regs fills in first two parameters */ + save_mcount_regs + +GLOBAL(ftrace_caller_op_ptr) + /* Load the ftrace_ops into the 3rd parameter */ + movq function_trace_op(%rip), %rdx + /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx @@ -182,7 +189,12 @@ ENTRY(ftrace_regs_caller) pushfq /* added 8 bytes to save flags */ - ftrace_caller_setup ftrace_regs_caller_op_ptr 8 + save_mcount_regs 8 + /* save_mcount_regs fills in first two parameters */ + +GLOBAL(ftrace_regs_caller_op_ptr) + /* Load the ftrace_ops into the 3rd parameter */ + movq function_trace_op(%rip), %rdx /* Save the rest of pt_regs */ movq %r15, R15(%rsp) @@ -263,7 +275,8 @@ GLOBAL(ftrace_stub) retq trace: - ftrace_caller_setup ftrace_caller_op_ptr + /* save_mcount_regs fills in first two parameters */ + save_mcount_regs call *ftrace_trace_function @@ -276,16 +289,16 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) + /* Saves rbp into %rdx */ save_mcount_regs #ifdef CC_USING_FENTRY leaq MCOUNT_REG_SIZE+8(%rsp), %rdi movq $0, %rdx /* No framepointers needed */ #else - /* Need to grab the original %rbp */ - movq RBP(%rsp), %rdx - /* Now parent address is 8 above original %rbp */ + /* Save address of the return address of traced function */ leaq 8(%rdx), %rdi + /* ftrace does sanity checks against frame pointers */ movq (%rdx), %rdx #endif movq RIP(%rsp), %rsi From 6a06bdbf7f9c669743f58084991ba280f2925586 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 24 Nov 2014 21:00:34 -0500 Subject: [PATCH 50/55] ftrace/fgraph/x86: Have prepare_ftrace_return() take ip as first parameter The function graph helper function prepare_ftrace_return() which does the work to hijack the parent pointer has that parent pointer as its first parameter. Instead, if we make it the second parameter and have ip as the first parameter (self_addr), then it can use the %rdi from save_mcount_regs that loads it already. Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1411262304010.3961@nanos Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 4 ++-- arch/x86/kernel/mcount_64.S | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 60881d919432..2142376dc8c6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -871,7 +871,7 @@ static void *addr_from_call(void *ptr) return ptr + MCOUNT_INSN_SIZE + calc.offset; } -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer); /* @@ -964,7 +964,7 @@ int ftrace_disable_ftrace_graph_caller(void) * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { unsigned long old; diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index ddc766efa1f1..94ea120fa21f 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -124,7 +124,7 @@ /* * Now %rdi (the first parameter) has the return address of * where ftrace_call returns. But the callbacks expect the - * the address of the call itself. + * address of the call itself. */ subq $MCOUNT_INSN_SIZE, %rdi .endm @@ -289,21 +289,18 @@ END(function_hook) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - /* Saves rbp into %rdx */ + /* Saves rbp into %rdx and fills first parameter */ save_mcount_regs #ifdef CC_USING_FENTRY - leaq MCOUNT_REG_SIZE+8(%rsp), %rdi + leaq MCOUNT_REG_SIZE+8(%rsp), %rsi movq $0, %rdx /* No framepointers needed */ #else /* Save address of the return address of traced function */ - leaq 8(%rdx), %rdi + leaq 8(%rdx), %rsi /* ftrace does sanity checks against frame pointers */ movq (%rdx), %rdx #endif - movq RIP(%rsp), %rsi - subq $MCOUNT_INSN_SIZE, %rsi - call prepare_ftrace_return restore_mcount_regs From e12c09cf3087b5a184ffeb55ca368e8aa436a3a2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Dec 2014 11:55:36 -0500 Subject: [PATCH 51/55] tracing: Add NOT to filtering logic Ted noticed that he could not filter on an event for a bit being cleared. That's because the filtering logic only tests event fields with a limited number of comparisons which, for bit logic, only include "&", which can test if a bit is set, but there's no good way to see if a bit is clear. This adds a way to do: !(field & 2048) Which returns true if the bit is not set, and false otherwise. Note, currently !(field1 == 10 && field2 == 15) is not supported. That is, the 'not' only works for direct comparisons, not for the AND and OR logic. Link: http://lkml.kernel.org/r/20141202021912.GA29096@thunk.org Link: http://lkml.kernel.org/r/20141202120430.71979060@gandalf.local.home Acked-by: Alexei Starovoitov Suggested-by: "Theodore Ts'o" Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7a8c1528e141..e6a33db83856 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -45,6 +45,7 @@ enum filter_op_ids OP_GT, OP_GE, OP_BAND, + OP_NOT, OP_NONE, OP_OPEN_PAREN, }; @@ -67,6 +68,7 @@ static struct filter_op filter_ops[] = { { OP_GT, ">", 5 }, { OP_GE, ">=", 5 }, { OP_BAND, "&", 6 }, + { OP_NOT, "!", 6 }, { OP_NONE, "OP_NONE", 0 }, { OP_OPEN_PAREN, "(", 0 }, }; @@ -85,6 +87,7 @@ enum { FILT_ERR_MISSING_FIELD, FILT_ERR_INVALID_FILTER, FILT_ERR_IP_FIELD_ONLY, + FILT_ERR_ILLEGAL_NOT_OP, }; static char *err_text[] = { @@ -101,6 +104,7 @@ static char *err_text[] = { "Missing field name and/or value", "Meaningless filter expression", "Only 'ip' field is supported for function trace", + "Illegal use of '!'", }; struct opstack_op { @@ -139,6 +143,7 @@ struct pred_stack { int index; }; +/* If not of not match is equal to not of not, then it is a match */ #define DEFINE_COMPARISON_PRED(type) \ static int filter_pred_##type(struct filter_pred *pred, void *event) \ { \ @@ -166,7 +171,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ break; \ } \ \ - return match; \ + return !!match == !pred->not; \ } #define DEFINE_EQUALITY_PRED(size) \ @@ -1028,7 +1033,7 @@ static int init_pred(struct filter_parse_state *ps, } if (pred->op == OP_NE) - pred->not = 1; + pred->not ^= 1; pred->fn = fn; return 0; @@ -1590,6 +1595,17 @@ static int replace_preds(struct ftrace_event_call *call, continue; } + if (elt->op == OP_NOT) { + if (!n_preds || operand1 || operand2) { + parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0); + err = -EINVAL; + goto fail; + } + if (!dry_run) + filter->preds[n_preds - 1].not ^= 1; + continue; + } + if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); err = -ENOSPC; From eabb8980a96cff42ae70cc6ab143f4003f02c874 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Dec 2014 19:08:30 -0500 Subject: [PATCH 52/55] tracing: Allow NOT to filter AND and OR clauses Add support to allow not "!" for and (&&) and (||). That is: !(field1 == X && field2 == Y) Where the value of the full clause will be notted. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e6a33db83856..ced69da0ff55 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -489,9 +489,10 @@ static int process_ops(struct filter_pred *preds, if (!WARN_ON_ONCE(!pred->fn)) match = pred->fn(pred, rec); if (!!match == type) - return match; + break; } - return match; + /* If not of not match is equal to not of not, then it is a match */ + return !!match == !op->not; } struct filter_match_preds_data { @@ -740,10 +741,10 @@ static int filter_set_pred(struct event_filter *filter, * then this op can be folded. */ if (left->index & FILTER_PRED_FOLD && - (left->op == dest->op || + ((left->op == dest->op && !left->not) || left->left == FILTER_PRED_INVALID) && right->index & FILTER_PRED_FOLD && - (right->op == dest->op || + ((right->op == dest->op && !right->not) || right->left == FILTER_PRED_INVALID)) dest->index |= FILTER_PRED_FOLD; From 3e6fb8e94329270c70ea00867112a2c6f348207f Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 3 Dec 2014 10:39:20 +0900 Subject: [PATCH 53/55] Documentation: describe trace_buf_size parameter more accurately I'm stuck into panic that too litte free memory is left when I boot with trace_buf_size parameter. After digging into the problem, I found that trace_buf_size is the size of trace buffer on each cpu rather than total size of trace buffer. To prevent victim like me, change description of trace_buf_size parameter more accurately. Link: http://lkml.kernel.org/r/1417570760-10620-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Signed-off-by: Steven Rostedt --- Documentation/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7dbe5ec9d9cd..1d09eb37c562 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3477,7 +3477,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. are saved. trace_buf_size=nn[KMG] - [FTRACE] will set tracing buffer size. + [FTRACE] will set tracing buffer size on each cpu. trace_event=[event-list] [FTRACE] Set and start specified trace events in order From 8e1e1df29d837c589c8b4d7b49864481ff7795b8 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 24 Nov 2014 09:34:19 +0900 Subject: [PATCH 54/55] tracing: Add additional marks to signal very large time deltas Currently, function graph tracer prints "!" or "+" just before function execution time to signal a function overhead, depending on the time. And some tracers tracing latency also print "!" or "+" just after time to signal overhead, depending on the interval between events. Even it is usually enough to do that, we sometimes need to signal for bigger execution time than 100 micro seconds. For example, I used function graph tracer to detect if there is any case that exit_mm() takes too much time. I did following steps in /sys/kernel/debug/tracing. It was easier to detect very large excution time with patched kernel than with original kernel. $ echo exit_mm > set_graph_function $ echo function_graph > current_tracer $ echo > trace $ cat trace_pipe > $LOGFILE ... (do something and terminate logging) $ grep "\\$" $LOGFILE 3) $ 22082032 us | } /* kernel_map_pages */ 3) $ 22082040 us | } /* free_pages_prepare */ 3) $ 22082113 us | } /* free_hot_cold_page */ 3) $ 22083455 us | } /* free_hot_cold_page_list */ 3) $ 22083895 us | } /* release_pages */ 3) $ 22177873 us | } /* free_pages_and_swap_cache */ 3) $ 22178929 us | } /* unmap_single_vma */ 3) $ 22198885 us | } /* unmap_vmas */ 3) $ 22206949 us | } /* exit_mmap */ 3) $ 22207659 us | } /* mmput */ 3) $ 22207793 us | } /* exit_mm */ And then, it was easy to find out that a schedule-out occured by sub_preempt_count() within kernel_map_pages(). To detect very large function exection time caused by either problematic function implementation or scheduling issues, this patch can be useful. Link: http://lkml.kernel.org/r/1416789259-24038-1-git-send-email-byungchul.park@lge.com Signed-off-by: Byungchul Park Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 10 +++++--- kernel/trace/trace.h | 2 ++ kernel/trace/trace_functions_graph.c | 23 +++---------------- kernel/trace/trace_output.c | 34 ++++++++++++++++++++++++---- 4 files changed, 41 insertions(+), 28 deletions(-) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index f10f5f5d260d..8408e040f06f 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -685,9 +685,11 @@ The above is mostly meaningful for kernel developers. needs to be fixed to be only relative to the same CPU. The marks are determined by the difference between this current trace and the next trace. - '!' - greater than preempt_mark_thresh (default 100) - '+' - greater than 1 microsecond - ' ' - less than or equal to 1 microsecond. + '$' - greater than 1 second + '#' - greater than 1000 microsecond + '!' - greater than 100 microsecond + '+' - greater than 10 microsecond + ' ' - less than or equal to 10 microsecond. The rest is the same as the 'trace' file. @@ -1956,6 +1958,8 @@ want, depending on your needs. + means that the function exceeded 10 usecs. ! means that the function exceeded 100 usecs. + # means that the function exceeded 1000 usecs. + $ means that the function exceeded 1 sec. - The task/pid field displays the thread cmdline and pid which diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c3a37e55ec8b..3255dfb054a0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -708,6 +708,8 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); extern unsigned long trace_flags; +extern char trace_find_mark(unsigned long long duration); + /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 100288d10e1f..6c2ab955018c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -730,8 +730,6 @@ static void print_graph_duration(unsigned long long duration, struct trace_seq *s, u32 flags) { - bool duration_printed = false; - if (!(flags & TRACE_GRAPH_PRINT_DURATION) || !(trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -750,24 +748,9 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, } /* Signal a overhead of time execution to the output */ - if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { - /* Duration exceeded 100 usecs */ - if (duration > 100000ULL) { - trace_seq_puts(s, "! "); - duration_printed = true; - - /* Duration exceeded 10 usecs */ - } else if (duration > 10000ULL) { - trace_seq_puts(s, "+ "); - duration_printed = true; - } - } - - /* - * If we did not exceed the duration tresholds or we dont want - * to print out the overhead. Either way we need to fill out the space. - */ - if (!duration_printed) + if (flags & TRACE_GRAPH_PRINT_OVERHEAD) + trace_seq_printf(s, "%c ", trace_find_mark(duration)); + else trace_seq_puts(s, " "); trace_print_graph_duration(duration, s); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 723818bc83b4..b77b9a697619 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -115,7 +115,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, if (ret == (const char *)(trace_seq_buffer_ptr(p))) trace_seq_printf(p, "0x%lx", val); - + trace_seq_putc(p, 0); return ret; @@ -443,7 +443,32 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_print_lat_fmt(s, entry); } -static unsigned long preempt_mark_thresh_us = 100; +#undef MARK +#define MARK(v, s) {.val = v, .sym = s} +/* trace overhead mark */ +static const struct trace_mark { + unsigned long long val; /* unit: nsec */ + char sym; +} mark[] = { + MARK(1000000000ULL , '$'), /* 1 sec */ + MARK(1000000ULL , '#'), /* 1000 usecs */ + MARK(100000ULL , '!'), /* 100 usecs */ + MARK(10000ULL , '+'), /* 10 usecs */ +}; +#undef MARK + +char trace_find_mark(unsigned long long d) +{ + int i; + int size = ARRAY_SIZE(mark); + + for (i = 0; i < size; i++) { + if (d >= mark[i].val) + break; + } + + return (i == size) ? ' ' : mark[i].sym; +} static int lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) @@ -480,8 +505,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) trace_seq_printf( s, " %4lldus%c: ", abs_ts, - rel_ts > preempt_mark_thresh_us ? '!' : - rel_ts > 1 ? '+' : ' '); + trace_find_mark(rel_ts * NSEC_PER_USEC)); } else { /* !verbose && !in_ns */ trace_seq_printf(s, " %4lld: ", abs_ts); @@ -663,7 +687,7 @@ int register_ftrace_event(struct trace_event *event) goto out; } else { - + event->type = next_event_type++; list = &ftrace_event_list; } From 3558a5ac50dbb2419cc649d5e154af161d661037 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 27 Nov 2014 18:57:52 +0300 Subject: [PATCH 55/55] tracing: Truncated output is better than nothing The initial reason for this patch is that I noticed that: if (len > TRACE_BUF_SIZE) is off by one. In this code, if len == TRACE_BUF_SIZE, then it means we have truncated the last character off the output string. If we truncate two or more characters then we exit without printing. After some discussion, we decided that printing truncated data is better than not printing at all so we should just use vscnprintf() and remove the test entirely. Also I have updated memcpy() to copy the NUL char instead of setting the NUL in a separate step. Link: http://lkml.kernel.org/r/20141127155752.GA21914@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ce3c4ccfc94..4ceb2546c7ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2158,9 +2158,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, goto out; } - len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - if (len > TRACE_BUF_SIZE) - goto out; + len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); local_save_flags(flags); size = sizeof(*entry) + len + 1; @@ -2171,8 +2169,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, entry = ring_buffer_event_data(event); entry->ip = ip; - memcpy(&entry->buf, tbuffer, len); - entry->buf[len] = '\0'; + memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc);