From 40ed29b373381532ef222e509c5aa69a1d8561ea Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sun, 23 Sep 2018 12:11:33 +0000 Subject: [PATCH 01/44] ring-buffer: Fix ring buffer size in rb_write_something() 'cnt' should be used to calculate ring buffer size rather than data->cnt Link: http://lkml.kernel.org/r/1537704693-184237-1-git-send-email-yuehaibing@huawei.com Signed-off-by: YueHaibing Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41b6f96e5366..4f33d7d841af 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4979,7 +4979,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) cnt = data->cnt + (nested ? 27 : 0); /* Multiply cnt by ~e, to make some unique increment */ - size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1); + size = (cnt * 68 / 25) % (sizeof(rb_string) - 1); len = size + sizeof(struct rb_item); From f45d1225adb0479478cee989e2ae2d7d2c62b31b Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 20 Mar 2019 11:28:51 -0700 Subject: [PATCH 02/44] tracing: Kernel access to Ftrace instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ftrace provides the feature “instances” that provides the capability to create multiple Ftrace ring buffers. However, currently these buffers are created/accessed via userspace only. The kernel APIs providing these features are not exported, hence cannot be used by other kernel components. This patch aims to extend this infrastructure to provide the flexibility to create/log/remove/ enable-disable existing trace events to these buffers from within the kernel. Link: http://lkml.kernel.org/r/1553106531-3281-2-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Reviewed-by: Joe Jin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 74 +++++++++++++++++++++++++------------ kernel/trace/trace_events.c | 1 + 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21153e64bf1c..4384fcc386c8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3053,6 +3053,7 @@ void trace_printk_init_buffers(void) if (global_trace.trace_buffer.buffer) tracing_start_cmdline_record(); } +EXPORT_SYMBOL_GPL(trace_printk_init_buffers); void trace_printk_start_comm(void) { @@ -3213,6 +3214,7 @@ int trace_array_printk(struct trace_array *tr, va_end(ap); return ret; } +EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, @@ -8037,7 +8039,7 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } -static int instance_mkdir(const char *name) +struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; int ret; @@ -8101,7 +8103,7 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return 0; + return tr; out_free_tr: free_trace_buffers(tr); @@ -8113,33 +8115,21 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return ret; + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(trace_array_create); +static int instance_mkdir(const char *name) +{ + return PTR_ERR_OR_ZERO(trace_array_create(name)); } -static int instance_rmdir(const char *name) +static int __remove_instance(struct trace_array *tr) { - struct trace_array *tr; - int found = 0; - int ret; int i; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); - - ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - found = 1; - break; - } - } - if (!found) - goto out_unlock; - - ret = -EBUSY; if (tr->ref || (tr->current_trace && tr->current_trace->ref)) - goto out_unlock; + return -EBUSY; list_del(&tr->list); @@ -8165,10 +8155,46 @@ static int instance_rmdir(const char *name) free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); + tr = NULL; - ret = 0; + return 0; +} + +int trace_array_destroy(struct trace_array *tr) +{ + int ret; + + if (!tr) + return -EINVAL; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = __remove_instance(tr); + + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_array_destroy); + +static int instance_rmdir(const char *name) +{ + struct trace_array *tr; + int ret; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -ENODEV; + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, name) == 0) { + ret = __remove_instance(tr); + break; + } + } - out_unlock: mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5b3b0c3c8a47..81c038ed6cee 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -832,6 +832,7 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) return ret; } +EXPORT_SYMBOL_GPL(ftrace_set_clr_event); /** * trace_set_clr_event - enable or disable an event From 8a062902be725f647dc8da532b04d836546a369a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:15 -0500 Subject: [PATCH 03/44] tracing: Add tracing error log Introduce a new ftrace file, tracing/error_log, for ftrace commands to log errors. This is useful for allowing more complex commands such as hist trigger and kprobe_event commands to point out specifically where something may have gone wrong without forcing them to resort to more ad hoc methods such as tacking error messages onto existing output files. To log a tracing error, call the event_log_err() function, passing it a location string describing where it came from e.g. kprobe_events or system:event, the command that caused the error, an array of static error strings describing errors and an index within that array which describes the specific error, along with the position to place the error caret. Reading the log displays the last (currently) 8 errors logged in the following format: [timestamp] : error: Command: ^ Memory for the error log isn't allocated unless there has been a trace event error, and the error log can be cleared and have its memory freed by writing the empty string in truncation mode to it: # echo > tracing/error_log. Link: http://lkml.kernel.org/r/0c2c82571fd38c5f3a88ca823627edff250e9416.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Suggested-by: Masami Hiramatsu Improvements-suggested-by: Steve Rostedt Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 218 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 4 + 2 files changed, 222 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4384fcc386c8..7978168f5041 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6878,6 +6878,221 @@ static const struct file_operations snapshot_raw_fops = { #endif /* CONFIG_TRACER_SNAPSHOT */ +#define TRACING_LOG_ERRS_MAX 8 +#define TRACING_LOG_LOC_MAX 128 + +#define CMD_PREFIX " Command: " + +struct err_info { + const char **errs; /* ptr to loc-specific array of err strings */ + u8 type; /* index into errs -> specific err string */ + u8 pos; /* MAX_FILTER_STR_VAL = 256 */ + u64 ts; +}; + +struct tracing_log_err { + struct list_head list; + struct err_info info; + char loc[TRACING_LOG_LOC_MAX]; /* err location */ + char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ +}; + +static LIST_HEAD(tracing_err_log); +static DEFINE_MUTEX(tracing_err_log_lock); + +static unsigned int n_tracing_err_log_entries; + +struct tracing_log_err *get_tracing_log_err(void) +{ + struct tracing_log_err *err; + + if (n_tracing_err_log_entries < TRACING_LOG_ERRS_MAX) { + err = kzalloc(sizeof(*err), GFP_KERNEL); + if (!err) + err = ERR_PTR(-ENOMEM); + n_tracing_err_log_entries++; + + return err; + } + + err = list_first_entry(&tracing_err_log, struct tracing_log_err, list); + list_del(&err->list); + + return err; +} + +/** + * err_pos - find the position of a string within a command for error careting + * @cmd: The tracing command that caused the error + * @str: The string to position the caret at within @cmd + * + * Finds the position of the first occurence of @str within @cmd. The + * return value can be passed to tracing_log_err() for caret placement + * within @cmd. + * + * Returns the index within @cmd of the first occurence of @str or 0 + * if @str was not found. + */ +unsigned int err_pos(char *cmd, const char *str) +{ + char *found; + + if (WARN_ON(!strlen(cmd))) + return 0; + + found = strstr(cmd, str); + if (found) + return found - cmd; + + return 0; +} + +/** + * tracing_log_err - write an error to the tracing error log + * @loc: A string describing where the error occurred + * @cmd: The tracing command that caused the error + * @errs: The array of loc-specific static error strings + * @type: The index into errs[], which produces the specific static err string + * @pos: The position the caret should be placed in the cmd + * + * Writes an error into tracing/error_log of the form: + * + * : error: + * Command: + * ^ + * + * tracing/error_log is a small log file containing the last + * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated + * unless there has been a tracing error, and the error log can be + * cleared and have its memory freed by writing the empty string in + * truncation mode to it i.e. echo > tracing/error_log. + * + * NOTE: the @errs array along with the @type param are used to + * produce a static error string - this string is not copied and saved + * when the error is logged - only a pointer to it is saved. See + * existing callers for examples of how static strings are typically + * defined for use with tracing_log_err(). + */ +void tracing_log_err(const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos) +{ + struct tracing_log_err *err; + + mutex_lock(&tracing_err_log_lock); + err = get_tracing_log_err(); + if (PTR_ERR(err) == -ENOMEM) { + mutex_unlock(&tracing_err_log_lock); + return; + } + + snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); + snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd); + + err->info.errs = errs; + err->info.type = type; + err->info.pos = pos; + err->info.ts = local_clock(); + + list_add_tail(&err->list, &tracing_err_log); + mutex_unlock(&tracing_err_log_lock); +} + +static void clear_tracing_err_log(void) +{ + struct tracing_log_err *err, *next; + + mutex_lock(&tracing_err_log_lock); + list_for_each_entry_safe(err, next, &tracing_err_log, list) { + list_del(&err->list); + kfree(err); + } + + n_tracing_err_log_entries = 0; + mutex_unlock(&tracing_err_log_lock); +} + +static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&tracing_err_log_lock); + + return seq_list_start(&tracing_err_log, *pos); +} + +static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &tracing_err_log, pos); +} + +static void tracing_err_log_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&tracing_err_log_lock); +} + +static void tracing_err_log_show_pos(struct seq_file *m, u8 pos) +{ + u8 i; + + for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) + seq_putc(m, ' '); + for (i = 0; i < pos; i++) + seq_putc(m, ' '); + seq_puts(m, "^\n"); +} + +static int tracing_err_log_seq_show(struct seq_file *m, void *v) +{ + struct tracing_log_err *err = v; + + if (err) { + const char *err_text = err->info.errs[err->info.type]; + u64 sec = err->info.ts; + u32 nsec; + + nsec = do_div(sec, NSEC_PER_SEC); + seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, + err->loc, err_text); + seq_printf(m, "%s", err->cmd); + tracing_err_log_show_pos(m, err->info.pos); + } + + return 0; +} + +static const struct seq_operations tracing_err_log_seq_ops = { + .start = tracing_err_log_seq_start, + .next = tracing_err_log_seq_next, + .stop = tracing_err_log_seq_stop, + .show = tracing_err_log_seq_show +}; + +static int tracing_err_log_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + /* If this file was opened for write, then erase contents */ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + clear_tracing_err_log(); + + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &tracing_err_log_seq_ops); + + return ret; +} + +static ssize_t tracing_err_log_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracing_err_log_fops = { + .open = tracing_err_log_open, + .write = tracing_err_log_write, + .read = seq_read, + .llseek = seq_lseek, +}; + static int tracing_buffers_open(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -8284,6 +8499,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) tr, &snapshot_fops); #endif + trace_create_file("error_log", 0644, d_tracer, + tr, &tracing_err_log_fops); + for_each_tracing_cpu(cpu) tracing_init_tracefs_percpu(tr, cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d80cee49e0eb..b711edbef7e7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1884,6 +1884,10 @@ extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); +extern unsigned int err_pos(char *cmd, const char *str); +extern void tracing_log_err(const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos); + /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats From a1a05bb40e229d148c071fcd2ed787b21f61ff8b Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:16 -0500 Subject: [PATCH 04/44] tracing: Save the last hist command's associated event name In preparation for making use of the new trace error log, save the subsystem and event name associated with the last hist command - it will be passed as the location param in the event_log_err() calls. Link: http://lkml.kernel.org/r/eb0fd1362be8f39facb86c83eecf441b7a5876f8.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 33 ++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 795aa2038377..0de702bf148f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -535,15 +535,34 @@ static struct track_data *track_data_alloc(unsigned int key_len, return data; } -static char last_hist_cmd[MAX_FILTER_STR_VAL]; +static char last_cmd[MAX_FILTER_STR_VAL]; +static char last_cmd_loc[MAX_FILTER_STR_VAL]; + static char hist_err_str[MAX_FILTER_STR_VAL]; -static void last_cmd_set(char *str) +static void last_cmd_set(struct trace_event_file *file, char *str) { + const char *system = NULL, *name = NULL; + struct trace_event_call *call; + if (!str) return; - strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); + strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); + + if (file) { + call = file->event_call; + + system = call->class->system; + if (system) { + name = trace_event_name(call); + if (!name) + system = NULL; + } + } + + if (system) + snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } static void hist_err(char *str, char *var) @@ -583,6 +602,8 @@ static void hist_err_event(char *str, char *system, char *event, char *var) static void hist_err_clear(void) { hist_err_str[0] = '\0'; + last_cmd[0] = '\0'; + last_cmd_loc[0] = '\0'; } static bool have_hist_err(void) @@ -5438,8 +5459,8 @@ static int hist_show(struct seq_file *m, void *v) } if (have_hist_err()) { - seq_printf(m, "\nERROR: %s\n", hist_err_str); - seq_printf(m, " Last command: %s\n", last_hist_cmd); + seq_printf(m, "\n%s: error: \n", hist_err_str); + seq_printf(m, " Last command: %s\n", last_cmd); } out_unlock: @@ -6043,8 +6064,8 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); if (glob && strlen(glob)) { - last_cmd_set(param); hist_err_clear(); + last_cmd_set(file, param); } if (!param) From d566c5e9d1bad6773fe9cce3d4514cca2cc32e4e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:17 -0500 Subject: [PATCH 05/44] tracing: Use tracing error_log with hist triggers Replace hist_err() and hist_err_event() with tracing_log_err() from the new tracing error_log mechanism. Also add a couple related helper functions and remove most of the old hist_err()-related code. With this change, users no longer read the hist files for hist trigger error information, but instead look at tracing/error_log for the same information. Link: http://lkml.kernel.org/r/c98f77a97c9715d18b623eeb5741057b330d5ac0.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 206 ++++++++++++++++--------------- 1 file changed, 104 insertions(+), 102 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0de702bf148f..071c62cacba7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -22,6 +22,57 @@ #define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ +#define ERRORS \ + C(NONE, "No error"), \ + C(DUPLICATE_VAR, "Variable already defined"), \ + C(VAR_NOT_UNIQUE, "Variable name not unique, need to use fully qualified name (subsys.event.var) for variable"), \ + C(TOO_MANY_VARS, "Too many variables defined"), \ + C(MALFORMED_ASSIGNMENT, "Malformed assignment"), \ + C(NAMED_MISMATCH, "Named hist trigger doesn't match existing named trigger (includes variables)"), \ + C(TRIGGER_EEXIST, "Hist trigger already exists"), \ + C(TRIGGER_ENOENT_CLEAR, "Can't clear or continue a nonexistent hist trigger"), \ + C(SET_CLOCK_FAIL, "Couldn't set trace_clock"), \ + C(BAD_FIELD_MODIFIER, "Invalid field modifier"), \ + C(TOO_MANY_SUBEXPR, "Too many subexpressions (3 max)"), \ + C(TIMESTAMP_MISMATCH, "Timestamp units in expression don't match"), \ + C(TOO_MANY_FIELD_VARS, "Too many field variables defined"), \ + C(EVENT_FILE_NOT_FOUND, "Event file not found"), \ + C(HIST_NOT_FOUND, "Matching event histogram not found"), \ + C(HIST_CREATE_FAIL, "Couldn't create histogram for field"), \ + C(SYNTH_VAR_NOT_FOUND, "Couldn't find synthetic variable"), \ + C(SYNTH_EVENT_NOT_FOUND,"Couldn't find synthetic event"), \ + C(SYNTH_TYPE_MISMATCH, "Param type doesn't match synthetic event field type"), \ + C(SYNTH_COUNT_MISMATCH, "Param count doesn't match synthetic event field count"), \ + C(FIELD_VAR_PARSE_FAIL, "Couldn't parse field variable"), \ + C(VAR_CREATE_FIND_FAIL, "Couldn't create or find variable"), \ + C(ONX_NOT_VAR, "For onmax(x) or onchange(x), x must be a variable"), \ + C(ONX_VAR_NOT_FOUND, "Couldn't find onmax or onchange variable"), \ + C(ONX_VAR_CREATE_FAIL, "Couldn't create onmax or onchange variable"), \ + C(FIELD_VAR_CREATE_FAIL,"Couldn't create field variable"), \ + C(TOO_MANY_PARAMS, "Too many action params"), \ + C(PARAM_NOT_FOUND, "Couldn't find param"), \ + C(INVALID_PARAM, "Invalid action param"), \ + C(ACTION_NOT_FOUND, "No action found"), \ + C(NO_SAVE_PARAMS, "No params found for save()"), \ + C(TOO_MANY_SAVE_ACTIONS,"Can't have more than one save() action per hist"), \ + C(ACTION_MISMATCH, "Handler doesn't support action"), \ + C(NO_CLOSING_PAREN, "No closing paren found"), \ + C(SUBSYS_NOT_FOUND, "Missing subsystem"), \ + C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ + C(INVALID_REF_KEY, "Using variable references as keys not supported"), \ + C(VAR_NOT_FOUND, "Couldn't find variable"), \ + C(FIELD_NOT_FOUND, "Couldn't find field"), + +#undef C +#define C(a, b) HIST_ERR_##a + +enum { ERRORS }; + +#undef C +#define C(a, b) b + +static const char *err_text[] = { ERRORS }; + struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, @@ -538,7 +589,10 @@ static struct track_data *track_data_alloc(unsigned int key_len, static char last_cmd[MAX_FILTER_STR_VAL]; static char last_cmd_loc[MAX_FILTER_STR_VAL]; -static char hist_err_str[MAX_FILTER_STR_VAL]; +static int errpos(char *str) +{ + return err_pos(last_cmd, str); +} static void last_cmd_set(struct trace_event_file *file, char *str) { @@ -565,55 +619,17 @@ static void last_cmd_set(struct trace_event_file *file, char *str) snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } -static void hist_err(char *str, char *var) +static void hist_err(u8 err_type, u8 err_pos) { - int maxlen = MAX_FILTER_STR_VAL - 1; - - if (!str) - return; - - if (strlen(hist_err_str)) - return; - - if (!var) - var = ""; - - if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) - return; - - strcat(hist_err_str, str); - strcat(hist_err_str, var); -} - -static void hist_err_event(char *str, char *system, char *event, char *var) -{ - char err[MAX_FILTER_STR_VAL]; - - if (system && var) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); - else if (system) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); - else - strscpy(err, var, MAX_FILTER_STR_VAL); - - hist_err(str, err); + tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); } static void hist_err_clear(void) { - hist_err_str[0] = '\0'; last_cmd[0] = '\0'; last_cmd_loc[0] = '\0'; } -static bool have_hist_err(void) -{ - if (strlen(hist_err_str)) - return true; - - return false; -} - struct synth_trace_event { struct trace_entry ent; u64 fields[]; @@ -1740,7 +1756,7 @@ static struct trace_event_file *find_var_file(struct trace_array *tr, if (find_var_field(var_hist_data, var_name)) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return NULL; } @@ -1791,7 +1807,7 @@ find_match_var(struct hist_trigger_data *hist_data, char *var_name) hist_field = find_file_var(file, var_name); if (hist_field) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return ERR_PTR(-EINVAL); } @@ -2023,7 +2039,6 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) attrs->n_actions++; ret = 0; } - return ret; } @@ -2083,7 +2098,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) char *assignment; if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", str); + hist_err(HIST_ERR_TOO_MANY_VARS, errpos(str)); ret = -EINVAL; goto out; } @@ -2681,8 +2696,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, system, event_name); if (!ref_field) - hist_err_event("Couldn't find variable: $", - system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); return ref_field; } @@ -2716,7 +2730,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else if (strcmp(modifier, "usecs") == 0) *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; else { - hist_err("Invalid field modifier: ", modifier); + hist_err(HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); field = ERR_PTR(-EINVAL); goto out; } @@ -2732,7 +2746,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err("Couldn't find field: ", field_name); + hist_err(HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); field = ERR_PTR(-EINVAL); goto out; } @@ -2843,7 +2857,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, /* we support only -(xxx) i.e. explicit parens required */ if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; } @@ -2926,7 +2940,7 @@ static int check_expr_operands(struct hist_field *operand1, if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { - hist_err("Timestamp units in expression don't match", NULL); + hist_err(HIST_ERR_TIMESTAMP_MISMATCH, 0); return -EINVAL; } @@ -2944,7 +2958,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, char *sep, *operand1_str; if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } @@ -3182,16 +3196,14 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, int ret; if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { - hist_err_event("trace action: Too many field variables defined: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); return ERR_PTR(-EINVAL); } file = event_file(tr, subsys_name, event_name); if (IS_ERR(file)) { - hist_err_event("trace action: Event file not found: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); ret = PTR_ERR(file); return ERR_PTR(ret); } @@ -3204,8 +3216,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, */ hist_data = find_compatible_hist(target_hist_data, file); if (!hist_data) { - hist_err_event("trace action: Matching event histogram not found: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3266,8 +3277,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't create histogram for field: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); return ERR_PTR(ret); } @@ -3279,8 +3289,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, if (IS_ERR_OR_NULL(event_var)) { kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't find synthetic variable: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3417,21 +3426,21 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { - hist_err("Too many field variables defined: ", field_name); + hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); ret = -EINVAL; goto err; } val = parse_atom(hist_data, file, field_name, &flags, NULL); if (IS_ERR(val)) { - hist_err("Couldn't parse field variable: ", field_name); + hist_err(HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); ret = PTR_ERR(val); goto err; } var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { - hist_err("Couldn't create or find variable: ", field_name); + hist_err(HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); kfree(val); ret = PTR_ERR(var); goto err; @@ -3763,14 +3772,14 @@ static int track_data_create(struct hist_trigger_data *hist_data, track_data_var_str = data->track_data.var_str; if (track_data_var_str[0] != '$') { - hist_err("For onmax(x) or onchange(x), x must be a variable: ", track_data_var_str); + hist_err(HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); return -EINVAL; } track_data_var_str++; var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str); if (!var_field) { - hist_err("Couldn't find onmax or onchange variable: ", track_data_var_str); + hist_err(HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); return -EINVAL; } @@ -3783,7 +3792,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONMAX) track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onmax variable: ", "__max"); + hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3791,7 +3800,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONCHANGE) track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onchange variable: ", "__change"); + hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3810,20 +3819,20 @@ static int parse_action_params(char *params, struct action_data *data) while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { - hist_err("Too many action params", ""); + hist_err(HIST_ERR_TOO_MANY_PARAMS, 0); goto out; } param = strsep(¶ms, ","); if (!param) { - hist_err("No action param found", ""); + hist_err(HIST_ERR_PARAM_NOT_FOUND, 0); ret = -EINVAL; goto out; } param = strstrip(param); if (strlen(param) < 2) { - hist_err("Invalid action param: ", param); + hist_err(HIST_ERR_INVALID_PARAM, errpos(param)); ret = -EINVAL; goto out; } @@ -3855,14 +3864,14 @@ static int action_parse(char *str, struct action_data *data, strsep(&str, "."); if (!str) { - hist_err("action parsing: No action found", ""); + hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } action_name = strsep(&str, "("); if (!action_name || !str) { - hist_err("action parsing: No action found", ""); + hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } @@ -3871,7 +3880,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!params) { - hist_err("action parsing: No params found for %s", "save"); + hist_err(HIST_ERR_NO_SAVE_PARAMS, 0); ret = -EINVAL; goto out; } @@ -3885,7 +3894,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3897,7 +3906,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!str) { - hist_err("action parsing: No closing paren found: %s", params); + hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(params)); ret = -EINVAL; goto out; } @@ -3907,7 +3916,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -4060,7 +4069,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, } if (!hist_field) - hist_err_event("trace action: Couldn't find param: $", system, event, var); + hist_err(HIST_ERR_PARAM_NOT_FOUND, errpos(var)); return hist_field; } @@ -4135,7 +4144,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event = find_synth_event(synth_event_name); if (!event) { - hist_err("trace action: Couldn't find synthetic event: ", synth_event_name); + hist_err(HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); return -EINVAL; } @@ -4196,15 +4205,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, continue; } - hist_err_event("trace action: Param type doesn't match synthetic event field type: ", - system, event_name, param); + hist_err(HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); kfree(p); ret = -EINVAL; goto err; } if (field_pos != event->n_fields) { - hist_err("trace action: Param count doesn't match synthetic event field count: ", event->name); + hist_err(HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); ret = -EINVAL; goto err; } @@ -4250,7 +4258,7 @@ static int action_create(struct hist_trigger_data *hist_data, if (data->action == ACTION_SAVE) { if (hist_data->n_save_vars) { ret = -EEXIST; - hist_err("save action: Can't have more than one save() action per hist", ""); + hist_err(HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); goto out; } @@ -4263,7 +4271,7 @@ static int action_create(struct hist_trigger_data *hist_data, field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { - hist_err("save action: Couldn't create field variable: ", param); + hist_err(HIST_ERR_FIELD_VAR_CREATE_FAIL, errpos(param)); ret = PTR_ERR(field_var); kfree(param); goto out; @@ -4297,19 +4305,18 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) match_event = strsep(&str, ")"); if (!match_event || !str) { - hist_err("onmatch: Missing closing paren: ", match_event); + hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); goto free; } match_event_system = strsep(&match_event, "."); if (!match_event) { - hist_err("onmatch: Missing subsystem for match event: ", match_event_system); + hist_err(HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); goto free; } if (IS_ERR(event_file(tr, match_event_system, match_event))) { - hist_err_event("onmatch: Invalid subsystem or event name: ", - match_event_system, match_event, NULL); + hist_err(HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); goto free; } @@ -4400,7 +4407,7 @@ static int create_var_field(struct hist_trigger_data *hist_data, return -EINVAL; if (find_var(hist_data, file, var_name) && !hist_data->remove) { - hist_err("Variable already defined: ", var_name); + hist_err(HIST_ERR_DUPLICATE_VAR, errpos(var_name)); return -EINVAL; } @@ -4481,7 +4488,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { - hist_err("Using variable references as keys not supported: ", field_str); + hist_err(HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; goto out; @@ -4595,13 +4602,13 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) var_name = strsep(&field_str, "="); if (!var_name || !field_str) { - hist_err("Malformed assignment: ", var_name); + hist_err(HIST_ERR_MALFORMED_ASSIGNMENT, errpos(var_name)); ret = -EINVAL; goto free; } if (n_vars == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", var_name); + hist_err(HIST_ERR_TOO_MANY_VARS, errpos(var_name)); ret = -EINVAL; goto free; } @@ -5458,11 +5465,6 @@ static int hist_show(struct seq_file *m, void *v) hist_trigger_show(m, data, n++); } - if (have_hist_err()) { - seq_printf(m, "\n%s: error: \n", hist_err_str); - seq_printf(m, " Last command: %s\n", last_cmd); - } - out_unlock: mutex_unlock(&event_mutex); @@ -5834,7 +5836,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { - hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); + hist_err(HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); ret = -EINVAL; goto out; } @@ -5855,7 +5857,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, else if (hist_data->attrs->clear) hist_clear(test); else { - hist_err("Hist trigger already exists", NULL); + hist_err(HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; } goto out; @@ -5863,7 +5865,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, } new: if (hist_data->attrs->cont || hist_data->attrs->clear) { - hist_err("Can't clear or continue a nonexistent hist trigger", NULL); + hist_err(HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); ret = -ENOENT; goto out; } @@ -5888,7 +5890,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, ret = tracing_set_clock(file->tr, hist_data->attrs->clock); if (ret) { - hist_err("Couldn't set trace_clock: ", clock); + hist_err(HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); goto out; } From 34f76afaac7a437a2ce381225135563928b359dd Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:18 -0500 Subject: [PATCH 06/44] tracing: Use tracing error_log with trace event filters Use tracing_log_err() from the new tracing error_log mechanism to send filter parse errors to tracing/error_log. With this change, users will be able to see filter errors by looking at tracing/error_log. The same errors will also be available in the filter file, as expected. Link: http://lkml.kernel.org/r/1d942c419941539a11d78a6810fc5740a99b2974.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 05a66493a164..290d42c59101 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -66,7 +66,8 @@ static const char * ops[] = { OPS }; C(INVALID_FILTER, "Meaningless filter expression"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ - C(NO_FILTER, "No filter found"), + C(ERRNO, "Error"), \ + C(NO_FILTER, "No filter found") #undef C #define C(a, b) FILT_ERR_##a @@ -76,7 +77,7 @@ enum { ERRORS }; #undef C #define C(a, b) b -static char *err_text[] = { ERRORS }; +static const char *err_text[] = { ERRORS }; /* Called after a '!' character but "!=" and "!~" are not "not"s */ static bool is_not(const char *str) @@ -947,8 +948,14 @@ static void append_filter_err(struct filter_parse_error *pe, if (pe->lasterr > 0) { trace_seq_printf(s, "\n%*s", pos, "^"); trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); + tracing_log_err("event filter parse error", + filter->filter_string, err_text, + pe->lasterr, pe->lasterr_pos); } else { trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); + tracing_log_err("event filter parse error", + filter->filter_string, err_text, + FILT_ERR_ERRNO, 0); } trace_seq_putc(s, 0); buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); From ab105a4fb89496c71c5a0f3222347c506c30feb0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 31 Mar 2019 18:48:19 -0500 Subject: [PATCH 07/44] tracing: Use tracing error_log with probe events Use tracing error_log with probe events for logging error more precisely. This also makes all parse error returns -EINVAL (except for -ENOMEM), because user can see better error message in error_log file now. Link: http://lkml.kernel.org/r/6a4d90e141d138040ea61f4776b991597077451e.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 77 ++++++---- kernel/trace/trace_probe.c | 274 ++++++++++++++++++++++++++---------- kernel/trace/trace_probe.h | 77 +++++++++- kernel/trace/trace_uprobe.c | 44 ++++-- 4 files changed, 348 insertions(+), 124 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5d5129b05df7..7d736248a070 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -441,13 +441,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(&tk->rp.kp); - if (ret == 0) { + if (ret == 0) tk->tp.flags |= TP_FLAG_REGISTERED; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } return ret; } @@ -591,7 +586,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) * Type of args: * FETCHARG:TYPE : use TYPE instead of unsigned long. */ - struct trace_kprobe *tk; + struct trace_kprobe *tk = NULL; int i, len, ret = 0; bool is_return = false; char *symbol = NULL, *tmp = NULL; @@ -615,44 +610,50 @@ static int trace_kprobe_create(int argc, const char *argv[]) if (argc < 2) return -ECANCELED; + trace_probe_log_init("trace_kprobe", argc, argv); + event = strchr(&argv[0][1], ':'); if (event) event++; if (isdigit(argv[0][1])) { if (!is_return) { - pr_info("Maxactive is not for kprobe"); - return -EINVAL; + trace_probe_log_err(1, MAXACT_NO_KPROBE); + goto parse_error; } if (event) len = event - &argv[0][1] - 1; else len = strlen(&argv[0][1]); - if (len > MAX_EVENT_NAME_LEN - 1) - return -E2BIG; + if (len > MAX_EVENT_NAME_LEN - 1) { + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; + } memcpy(buf, &argv[0][1], len); buf[len] = '\0'; ret = kstrtouint(buf, 0, &maxactive); if (ret || !maxactive) { - pr_info("Invalid maxactive number\n"); - return ret; + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; } /* kretprobes instances are iterated over via a list. The * maximum should stay reasonable. */ if (maxactive > KRETPROBE_MAXACTIVE_MAX) { - pr_info("Maxactive is too big (%d > %d).\n", - maxactive, KRETPROBE_MAXACTIVE_MAX); - return -E2BIG; + trace_probe_log_err(1, MAXACT_TOO_BIG); + goto parse_error; } } /* try to parse an address. if that fails, try to read the * input as a symbol. */ if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) { + trace_probe_log_set_index(1); /* Check whether uprobe event specified */ - if (strchr(argv[1], '/') && strchr(argv[1], ':')) - return -ECANCELED; + if (strchr(argv[1], '/') && strchr(argv[1], ':')) { + ret = -ECANCELED; + goto error; + } /* a symbol specified */ symbol = kstrdup(argv[1], GFP_KERNEL); if (!symbol) @@ -660,23 +661,23 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret || offset < 0 || offset > UINT_MAX) { - pr_info("Failed to parse either an address or a symbol.\n"); - goto out; + trace_probe_log_err(0, BAD_PROBE_ADDR); + goto parse_error; } if (kprobe_on_func_entry(NULL, symbol, offset)) flags |= TPARG_FL_FENTRY; if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { - pr_info("Given offset is not valid for return probe.\n"); - ret = -EINVAL; - goto out; + trace_probe_log_err(0, BAD_RETPROBE); + goto parse_error; } } - argc -= 2; argv += 2; + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) - goto out; + goto parse_error; } else { /* Make a new event name */ if (symbol) @@ -691,13 +692,14 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, - argc, is_return); + argc - 2, is_return); if (IS_ERR(tk)) { ret = PTR_ERR(tk); - /* This must return -ENOMEM otherwise there is a bug */ + /* This must return -ENOMEM, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); - goto out; + goto out; /* We know tk is not allocated */ } + argc -= 2; argv += 2; /* parse arguments */ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { @@ -707,19 +709,32 @@ static int trace_kprobe_create(int argc, const char *argv[]) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags); kfree(tmp); if (ret) - goto error; + goto error; /* This can be -ENOMEM */ } ret = register_trace_kprobe(tk); - if (ret) + if (ret) { + trace_probe_log_set_index(1); + if (ret == -EILSEQ) + trace_probe_log_err(0, BAD_INSN_BNDRY); + else if (ret == -ENOENT) + trace_probe_log_err(0, BAD_PROBE_ADDR); + else if (ret != -ENOMEM) + trace_probe_log_err(0, FAIL_REG_PROBE); goto error; + } + out: + trace_probe_log_clear(); kfree(symbol); return ret; +parse_error: + ret = -EINVAL; error: free_trace_kprobe(tk); goto out; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8f8411e7835f..e11f98c49d72 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -13,6 +13,11 @@ #include "trace_probe.h" +#undef C +#define C(a, b) b + +static const char *trace_probe_err_text[] = { ERRORS }; + static const char *reserved_field_names[] = { "common_type", "common_flags", @@ -133,6 +138,60 @@ fail: return NULL; } +static struct trace_probe_log trace_probe_log; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv) +{ + trace_probe_log.subsystem = subsystem; + trace_probe_log.argc = argc; + trace_probe_log.argv = argv; + trace_probe_log.index = 0; +} + +void trace_probe_log_clear(void) +{ + memset(&trace_probe_log, 0, sizeof(trace_probe_log)); +} + +void trace_probe_log_set_index(int index) +{ + trace_probe_log.index = index; +} + +void __trace_probe_log_err(int offset, int err_type) +{ + char *command, *p; + int i, len = 0, pos = 0; + + if (!trace_probe_log.argv) + return; + + /* Recalcurate the length and allocate buffer */ + for (i = 0; i < trace_probe_log.argc; i++) { + if (i == trace_probe_log.index) + pos = len; + len += strlen(trace_probe_log.argv[i]) + 1; + } + command = kzalloc(len, GFP_KERNEL); + if (!command) + return; + + /* And make a command string from argv array */ + p = command; + for (i = 0; i < trace_probe_log.argc; i++) { + len = strlen(trace_probe_log.argv[i]); + strcpy(p, trace_probe_log.argv[i]); + p[len] = ' '; + p += len + 1; + } + *(p - 1) = '\0'; + + tracing_log_err(trace_probe_log.subsystem, command, + trace_probe_err_text, err_type, pos + offset); + + kfree(command); +} + /* Split symbol and offset. */ int traceprobe_split_symbol_offset(char *symbol, long *offset) { @@ -156,7 +215,7 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) /* @buf must has MAX_EVENT_NAME_LEN size */ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, - char *buf) + char *buf, int offset) { const char *slash, *event = *pevent; int len; @@ -164,32 +223,33 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, slash = strchr(event, '/'); if (slash) { if (slash == event) { - pr_info("Group name is not specified\n"); + trace_probe_log_err(offset, NO_GROUP_NAME); return -EINVAL; } if (slash - event + 1 > MAX_EVENT_NAME_LEN) { - pr_info("Group name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, GROUP_TOO_LONG); + return -EINVAL; } strlcpy(buf, event, slash - event + 1); if (!is_good_name(buf)) { - pr_info("Group name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_GROUP_NAME); return -EINVAL; } *pgroup = buf; *pevent = slash + 1; + offset += slash - event + 1; event = *pevent; } len = strlen(event); if (len == 0) { - pr_info("Event name is not specified\n"); + trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; } else if (len > MAX_EVENT_NAME_LEN) { - pr_info("Event name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, EVENT_TOO_LONG); + return -EINVAL; } if (!is_good_name(event)) { - pr_info("Event name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_EVENT_NAME); return -EINVAL; } return 0; @@ -198,56 +258,67 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_insn *code, unsigned int flags) + struct fetch_insn *code, unsigned int flags, int offs) { unsigned long param; int ret = 0; int len; if (strcmp(arg, "retval") == 0) { - if (flags & TPARG_FL_RETURN) + if (flags & TPARG_FL_RETURN) { code->op = FETCH_OP_RETVAL; - else + } else { + trace_probe_log_err(offs, RETVAL_ON_PROBE); ret = -EINVAL; + } } else if ((len = str_has_prefix(arg, "stack"))) { if (arg[len] == '\0') { code->op = FETCH_OP_STACKP; } else if (isdigit(arg[len])) { ret = kstrtoul(arg + len, 10, ¶m); - if (ret || ((flags & TPARG_FL_KERNEL) && - param > PARAM_MAX_STACK)) + if (ret) { + goto inval_var; + } else if ((flags & TPARG_FL_KERNEL) && + param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_STACK_NUM); ret = -EINVAL; - else { + } else { code->op = FETCH_OP_STACK; code->param = (unsigned int)param; } } else - ret = -EINVAL; + goto inval_var; } else if (strcmp(arg, "comm") == 0) { code->op = FETCH_OP_COMM; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API } else if (((flags & TPARG_FL_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) && (len = str_has_prefix(arg, "arg"))) { - if (!isdigit(arg[len])) - return -EINVAL; ret = kstrtoul(arg + len, 10, ¶m); - if (ret || !param || param > PARAM_MAX_STACK) + if (ret) { + goto inval_var; + } else if (!param || param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_ARG_NUM); return -EINVAL; + } code->op = FETCH_OP_ARG; code->param = (unsigned int)param - 1; #endif } else - ret = -EINVAL; + goto inval_var; return ret; + +inval_var: + trace_probe_log_err(offs, BAD_VAR); + return -EINVAL; } /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, struct fetch_insn **pcode, struct fetch_insn *end, - unsigned int flags) + unsigned int flags, int offs) { struct fetch_insn *code = *pcode; unsigned long param; @@ -257,7 +328,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, type, code, flags); + ret = parse_probe_vars(arg + 1, type, code, flags, offs); break; case '%': /* named register */ @@ -266,47 +337,57 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->op = FETCH_OP_REG; code->param = (unsigned int)ret; ret = 0; - } + } else + trace_probe_log_err(offs, BAD_REG_NAME); break; case '@': /* memory, file-offset or symbol */ if (isdigit(arg[1])) { ret = kstrtoul(arg + 1, 0, ¶m); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_MEM_ADDR); break; + } /* load address */ code->op = FETCH_OP_IMM; code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ - if (flags & TPARG_FL_KERNEL) + if (flags & TPARG_FL_KERNEL) { + trace_probe_log_err(offs, FILE_ON_KPROBE); return -EINVAL; - + } ret = kstrtol(arg + 2, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_FILE_OFFS); break; + } code->op = FETCH_OP_FOFFS; code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ - if (!(flags & TPARG_FL_KERNEL)) + if (!(flags & TPARG_FL_KERNEL)) { + trace_probe_log_err(offs, SYM_ON_UPROBE); return -EINVAL; - + } /* Preserve symbol for updating */ code->op = FETCH_NOP_SYMBOL; code->data = kstrdup(arg + 1, GFP_KERNEL); if (!code->data) return -ENOMEM; - if (++code == end) - return -E2BIG; - + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } code->op = FETCH_OP_IMM; code->immediate = 0; } /* These are fetching from memory */ - if (++code == end) - return -E2BIG; + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } *pcode = code; code->op = FETCH_OP_DEREF; code->offset = offset; @@ -317,28 +398,38 @@ parse_probe_arg(char *arg, const struct fetch_type *type, /* fall through */ case '-': tmp = strchr(arg, '('); - if (!tmp) + if (!tmp) { + trace_probe_log_err(offs, DEREF_NEED_BRACE); return -EINVAL; - + } *tmp = '\0'; ret = kstrtol(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_DEREF_OFFS); break; - + } + offs += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0); arg = tmp + 1; tmp = strrchr(arg, ')'); - - if (tmp) { + if (!tmp) { + trace_probe_log_err(offs + strlen(arg), + DEREF_OPEN_BRACE); + return -EINVAL; + } else { const struct fetch_type *t2 = find_fetch_type(NULL); *tmp = '\0'; - ret = parse_probe_arg(arg, t2, &code, end, flags); + ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) + if (code->op == FETCH_OP_COMM) { + trace_probe_log_err(offs, COMM_CANT_DEREF); return -EINVAL; - if (++code == end) - return -E2BIG; + } + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } *pcode = code; code->op = FETCH_OP_DEREF; @@ -348,6 +439,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ + trace_probe_log_err(offs, BAD_FETCH_ARG); ret = -EINVAL; } return ret; @@ -379,7 +471,7 @@ static int __parse_bitfield_probe_arg(const char *bf, return -EINVAL; code++; if (code->op != FETCH_OP_NOP) - return -E2BIG; + return -EINVAL; *pcode = code; code->op = FETCH_OP_MOD_BF; @@ -392,32 +484,53 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, - struct probe_arg *parg, unsigned int flags) + struct probe_arg *parg, unsigned int flags, int offset) { struct fetch_insn *code, *scode, *tmp = NULL; - char *t, *t2; + char *t, *t2, *t3; int ret, len; - if (strlen(arg) > MAX_ARGSTR_LEN) { - pr_info("Argument is too long.: %s\n", arg); - return -ENOSPC; + len = strlen(arg); + if (len > MAX_ARGSTR_LEN) { + trace_probe_log_err(offset, ARG_TOO_LONG); + return -EINVAL; + } else if (len == 0) { + trace_probe_log_err(offset, NO_ARG_BODY); + return -EINVAL; } + parg->comm = kstrdup(arg, GFP_KERNEL); - if (!parg->comm) { - pr_info("Failed to allocate memory for command '%s'.\n", arg); + if (!parg->comm) return -ENOMEM; - } + t = strchr(arg, ':'); if (t) { *t = '\0'; t2 = strchr(++t, '['); if (t2) { - *t2 = '\0'; - parg->count = simple_strtoul(t2 + 1, &t2, 0); - if (strcmp(t2, "]") || parg->count == 0) + *t2++ = '\0'; + t3 = strchr(t2, ']'); + if (!t3) { + offset += t2 + strlen(t2) - arg; + trace_probe_log_err(offset, + ARRAY_NO_CLOSE); return -EINVAL; - if (parg->count > MAX_ARRAY_LEN) - return -E2BIG; + } else if (t3[1] != '\0') { + trace_probe_log_err(offset + t3 + 1 - arg, + BAD_ARRAY_SUFFIX); + return -EINVAL; + } + *t3 = '\0'; + if (kstrtouint(t2, 0, &parg->count) || !parg->count) { + trace_probe_log_err(offset + t2 - arg, + BAD_ARRAY_NUM); + return -EINVAL; + } + if (parg->count > MAX_ARRAY_LEN) { + trace_probe_log_err(offset + t2 - arg, + ARRAY_TOO_BIG); + return -EINVAL; + } } } /* @@ -429,7 +542,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, else parg->type = find_fetch_type(t); if (!parg->type) { - pr_info("Unsupported type: %s\n", t); + trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE); return -EINVAL; } parg->offset = *size; @@ -450,7 +563,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - flags); + flags, offset); if (ret) goto fail; @@ -458,7 +571,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { - pr_info("string only accepts memory or address.\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } @@ -470,7 +584,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, */ code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } } @@ -483,7 +598,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } else { code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_ST_RAW; @@ -493,20 +609,24 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); - if (ret) + if (ret) { + trace_probe_log_err(offset + t - arg, BAD_BITFIELD); goto fail; + } } /* Loop(Array) operation */ if (parg->count) { if (scode->op != FETCH_OP_ST_MEM && scode->op != FETCH_OP_ST_STRING) { - pr_info("array only accepts memory or address\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_LP_ARRAY; @@ -555,15 +675,19 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, { struct probe_arg *parg = &tp->args[i]; char *body; - int ret; /* Increment count for freeing args in error case */ tp->nr_args++; body = strchr(arg, '='); if (body) { - if (body - arg > MAX_ARG_NAME_LEN || body == arg) + if (body - arg > MAX_ARG_NAME_LEN) { + trace_probe_log_err(0, ARG_NAME_TOO_LONG); return -EINVAL; + } else if (body == arg) { + trace_probe_log_err(0, NO_ARG_NAME); + return -EINVAL; + } parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL); body++; } else { @@ -575,22 +699,16 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, return -ENOMEM; if (!is_good_name(parg->name)) { - pr_info("Invalid argument[%d] name: %s\n", - i, parg->name); + trace_probe_log_err(0, BAD_ARG_NAME); return -EINVAL; } - if (traceprobe_conflict_field_name(parg->name, tp->args, i)) { - pr_info("Argument[%d]: '%s' conflicts with another field.\n", - i, parg->name); + trace_probe_log_err(0, USED_ARG_NAME); return -EINVAL; } - /* Parse fetch argument */ - ret = traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags); - if (ret) - pr_info("Parse error at argument[%d]. (%d)\n", i, ret); - return ret; + return traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags, + body - arg); } void traceprobe_free_probe_arg(struct probe_arg *arg) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2177c206de15..b7737666c1a8 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -280,8 +280,8 @@ extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -extern int traceprobe_parse_event_name(const char **pevent, - const char **pgroup, char *buf); +int traceprobe_parse_event_name(const char **pevent, const char **pgroup, + char *buf, int offset); extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return); @@ -298,3 +298,76 @@ extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, size_t offset, struct trace_probe *tp); + +#undef ERRORS +#define ERRORS \ + C(FILE_NOT_FOUND, "Failed to find the given file"), \ + C(NO_REGULAR_FILE, "Not a regular file"), \ + C(BAD_REFCNT, "Invalid reference counter offset"), \ + C(REFCNT_OPEN_BRACE, "Reference counter brace is not closed"), \ + C(BAD_REFCNT_SUFFIX, "Reference counter has wrong suffix"), \ + C(BAD_UPROBE_OFFS, "Invalid uprobe offset"), \ + C(MAXACT_NO_KPROBE, "Maxactive is not for kprobe"), \ + C(BAD_MAXACT, "Invalid maxactive number"), \ + C(MAXACT_TOO_BIG, "Maxactive is too big"), \ + C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ + C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ + C(NO_GROUP_NAME, "Group name is not specified"), \ + C(GROUP_TOO_LONG, "Group name is too long"), \ + C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \ + C(NO_EVENT_NAME, "Event name is not specified"), \ + C(EVENT_TOO_LONG, "Event name is too long"), \ + C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \ + C(RETVAL_ON_PROBE, "$retval is not available on probe"), \ + C(BAD_STACK_NUM, "Invalid stack number"), \ + C(BAD_ARG_NUM, "Invalid argument number"), \ + C(BAD_VAR, "Invalid $-valiable specified"), \ + C(BAD_REG_NAME, "Invalid register name"), \ + C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ + C(BAD_FILE_OFFS, "Invalid file offset value"), \ + C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ + C(TOO_MANY_OPS, "Dereference is too much nested"), \ + C(DEREF_NEED_BRACE, "Dereference needs a brace"), \ + C(BAD_DEREF_OFFS, "Invalid dereference offset"), \ + C(DEREF_OPEN_BRACE, "Dereference brace is not closed"), \ + C(COMM_CANT_DEREF, "$comm can not be dereferenced"), \ + C(BAD_FETCH_ARG, "Invalid fetch argument"), \ + C(ARRAY_NO_CLOSE, "Array is not closed"), \ + C(BAD_ARRAY_SUFFIX, "Array has wrong suffix"), \ + C(BAD_ARRAY_NUM, "Invalid array size"), \ + C(ARRAY_TOO_BIG, "Array number is too big"), \ + C(BAD_TYPE, "Unknown type is specified"), \ + C(BAD_STRING, "String accepts only memory argument"), \ + C(BAD_BITFIELD, "Invalid bitfield"), \ + C(ARG_NAME_TOO_LONG, "Argument name is too long"), \ + C(NO_ARG_NAME, "Argument name is not specified"), \ + C(BAD_ARG_NAME, "Argument name must follow the same rules as C identifiers"), \ + C(USED_ARG_NAME, "This argument name is already used"), \ + C(ARG_TOO_LONG, "Argument expression is too long"), \ + C(NO_ARG_BODY, "No argument expression"), \ + C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ + C(FAIL_REG_PROBE, "Failed to register probe event"), + +#undef C +#define C(a, b) TP_ERR_##a + +/* Define TP_ERR_ */ +enum { ERRORS }; + +/* Error text is defined in trace_probe.c */ + +struct trace_probe_log { + const char *subsystem; + const char **argv; + int argc; + int index; +}; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv); +void trace_probe_log_set_index(int index); +void trace_probe_log_clear(void); +void __trace_probe_log_err(int offset, int err); + +#define trace_probe_log_err(offs, err) \ + __trace_probe_log_err(offs, TP_ERR_##err) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index be78d99ee6bc..cd8750a72768 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -457,13 +457,19 @@ static int trace_uprobe_create(int argc, const char **argv) return -ECANCELED; } + trace_probe_log_init("trace_uprobe", argc, argv); + trace_probe_log_set_index(1); /* filename is the 2nd argument */ + *arg++ = '\0'; ret = kern_path(filename, LOOKUP_FOLLOW, &path); if (ret) { + trace_probe_log_err(0, FILE_NOT_FOUND); kfree(filename); + trace_probe_log_clear(); return ret; } if (!d_is_reg(path.dentry)) { + trace_probe_log_err(0, NO_REGULAR_FILE); ret = -EINVAL; goto fail_address_parse; } @@ -472,9 +478,16 @@ static int trace_uprobe_create(int argc, const char **argv) rctr = strchr(arg, '('); if (rctr) { rctr_end = strchr(rctr, ')'); - if (rctr > rctr_end || *(rctr_end + 1) != 0) { + if (!rctr_end) { ret = -EINVAL; - pr_info("Invalid reference counter offset.\n"); + rctr_end = rctr + strlen(rctr); + trace_probe_log_err(rctr_end - filename, + REFCNT_OPEN_BRACE); + goto fail_address_parse; + } else if (rctr_end[1] != '\0') { + ret = -EINVAL; + trace_probe_log_err(rctr_end + 1 - filename, + BAD_REFCNT_SUFFIX); goto fail_address_parse; } @@ -482,22 +495,23 @@ static int trace_uprobe_create(int argc, const char **argv) *rctr_end = '\0'; ret = kstrtoul(rctr, 0, &ref_ctr_offset); if (ret) { - pr_info("Invalid reference counter offset.\n"); + trace_probe_log_err(rctr - filename, BAD_REFCNT); goto fail_address_parse; } } /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS); goto fail_address_parse; - - argc -= 2; - argv += 2; + } /* setup a probe */ + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) goto fail_address_parse; } else { @@ -519,6 +533,9 @@ static int trace_uprobe_create(int argc, const char **argv) kfree(tail); } + argc -= 2; + argv += 2; + tu = alloc_trace_uprobe(group, event, argc, is_return); if (IS_ERR(tu)) { ret = PTR_ERR(tu); @@ -539,6 +556,7 @@ static int trace_uprobe_create(int argc, const char **argv) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp, is_return ? TPARG_FL_RETURN : 0); kfree(tmp); @@ -547,20 +565,20 @@ static int trace_uprobe_create(int argc, const char **argv) } ret = register_trace_uprobe(tu); - if (ret) - goto error; - return 0; + if (!ret) + goto out; error: free_trace_uprobe(tu); +out: + trace_probe_log_clear(); return ret; fail_address_parse: + trace_probe_log_clear(); path_put(&path); kfree(filename); - pr_info("Failed to parse address or file.\n"); - return ret; } From 1e144d73f7295f766568c357448a11eb12868e29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 16:07:48 -0400 Subject: [PATCH 08/44] tracing: Add trace_array parameter to create_event_filter() Pass in the trace_array that represents the instance the filter being changed is in to create_event_filter(). This will allow for error messages that happen when writing to the filter can be displayed in the proper instance "error_log" file. Note, for calls to create_filter() (that was also modified to support create_event_filter()), that changes filters that do not exist in a instance (for perf for example), NULL may be passed in, which means that there will not be any message to log for that filter. Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 3 ++- kernel/trace/trace_events_filter.c | 25 ++++++++++++++----------- kernel/trace/trace_events_trigger.c | 3 ++- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b711edbef7e7..809c5d7f0064 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1553,7 +1553,8 @@ extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); -extern int create_event_filter(struct trace_event_call *call, +extern int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 290d42c59101..2b63930cd3e6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -920,7 +920,8 @@ static void remove_filter_string(struct event_filter *filter) filter->filter_string = NULL; } -static void append_filter_err(struct filter_parse_error *pe, +static void append_filter_err(struct trace_array *tr, + struct filter_parse_error *pe, struct event_filter *filter) { struct trace_seq *s; @@ -1607,7 +1608,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, if (err) { filter_disable(file); parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); - append_filter_err(pe, filter); + append_filter_err(tr, pe, filter); } else event_set_filtered_flag(file); @@ -1719,7 +1720,8 @@ static void create_filter_finish(struct filter_parse_error *pe) * information if @set_str is %true and the caller is responsible for * freeing it. */ -static int create_filter(struct trace_event_call *call, +static int create_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_string, bool set_str, struct event_filter **filterp) { @@ -1736,17 +1738,18 @@ static int create_filter(struct trace_event_call *call, err = process_preds(call, filter_string, *filterp, pe); if (err && set_str) - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); create_filter_finish(pe); return err; } -int create_event_filter(struct trace_event_call *call, +int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { - return create_filter(call, filter_str, set_str, filterp); + return create_filter(tr, call, filter_str, set_str, filterp); } /** @@ -1773,7 +1776,7 @@ static int create_system_filter(struct trace_subsystem_dir *dir, kfree((*filterp)->filter_string); (*filterp)->filter_string = NULL; } else { - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); } } create_filter_finish(pe); @@ -1804,7 +1807,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string) return 0; } - err = create_filter(call, filter_string, true, &filter); + err = create_filter(file->tr, call, filter_string, true, &filter); /* * Always swap the call filter with the new filter @@ -2060,7 +2063,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (event->filter) goto out_unlock; - err = create_filter(call, filter_str, false, &filter); + err = create_filter(NULL, call, filter_str, false, &filter); if (err) goto free_filter; @@ -2209,8 +2212,8 @@ static __init int ftrace_test_event_filter(void) struct test_filter_data_t *d = &test_filter_data[i]; int err; - err = create_filter(&event_ftrace_test_filter, d->filter, - false, &filter); + err = create_filter(NULL, &event_ftrace_test_filter, + d->filter, false, &filter); if (err) { printk(KERN_INFO "Failed to get filter for '%s', err %d\n", diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index cd12ecb66eb9..2a2912cb4533 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -731,7 +731,8 @@ int set_trigger_filter(char *filter_str, goto out; /* The filter is for the 'trigger' event, not the triggered event */ - ret = create_event_filter(file->event_call, filter_str, false, &filter); + ret = create_event_filter(file->tr, file->event_call, + filter_str, false, &filter); /* * If create_event_filter() fails, filter still needs to be freed. * Which the calling code will do with data->filter. From d0cd871ba0d613e09366e4b6a17946dfcf51db7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 22:30:22 -0400 Subject: [PATCH 09/44] tracing: Have histogram code pass around trace_array for error handling Have the trace_array that associates the trace instance of the histogram passed around to functions so that error handling can display the error message in the proper instance. Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 142 +++++++++++++++++-------------- 1 file changed, 80 insertions(+), 62 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 071c62cacba7..a167e439e9a1 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -619,7 +619,7 @@ static void last_cmd_set(struct trace_event_file *file, char *str) snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } -static void hist_err(u8 err_type, u8 err_pos) +static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos) { tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); } @@ -1756,7 +1756,7 @@ static struct trace_event_file *find_var_file(struct trace_array *tr, if (find_var_field(var_hist_data, var_name)) { if (found) { - hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return NULL; } @@ -1807,7 +1807,8 @@ find_match_var(struct hist_trigger_data *hist_data, char *var_name) hist_field = find_file_var(file, var_name); if (hist_field) { if (found) { - hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, + errpos(var_name)); return ERR_PTR(-EINVAL); } @@ -2042,7 +2043,8 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) return ret; } -static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) +static int parse_assignment(struct trace_array *tr, + char *str, struct hist_trigger_attrs *attrs) { int ret = 0; @@ -2098,7 +2100,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) char *assignment; if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { - hist_err(HIST_ERR_TOO_MANY_VARS, errpos(str)); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(str)); ret = -EINVAL; goto out; } @@ -2115,7 +2117,8 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) return ret; } -static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) +static struct hist_trigger_attrs * +parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) { struct hist_trigger_attrs *attrs; int ret = 0; @@ -2128,7 +2131,7 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) char *str = strsep(&trigger_str, ":"); if (strchr(str, '=')) { - ret = parse_assignment(str, attrs); + ret = parse_assignment(tr, str, attrs); if (ret) goto free; } else if (strcmp(str, "pause") == 0) @@ -2684,6 +2687,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, char *var_name) { struct hist_field *var_field = NULL, *ref_field = NULL; + struct trace_array *tr = hist_data->event_file->tr; if (!is_var_ref(var_name)) return NULL; @@ -2696,7 +2700,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, system, event_name); if (!ref_field) - hist_err(HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); return ref_field; } @@ -2707,6 +2711,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, { struct ftrace_event_field *field = NULL; char *field_name, *modifier, *str; + struct trace_array *tr = file->tr; modifier = str = kstrdup(field_str, GFP_KERNEL); if (!modifier) @@ -2730,7 +2735,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else if (strcmp(modifier, "usecs") == 0) *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; else { - hist_err(HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); + hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); field = ERR_PTR(-EINVAL); goto out; } @@ -2746,7 +2751,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err(HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); field = ERR_PTR(-EINVAL); goto out; } @@ -2808,7 +2813,8 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); if (!s) { - hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); + hist_field = parse_var_ref(hist_data, ref_system, + ref_event, ref_var); if (hist_field) { if (var_name) { hist_field = create_alias(hist_data, hist_field, var_name); @@ -2857,7 +2863,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, /* we support only -(xxx) i.e. explicit parens required */ if (level > 3) { - hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; } @@ -2912,7 +2918,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } -static int check_expr_operands(struct hist_field *operand1, +static int check_expr_operands(struct trace_array *tr, + struct hist_field *operand1, struct hist_field *operand2) { unsigned long operand1_flags = operand1->flags; @@ -2940,7 +2947,7 @@ static int check_expr_operands(struct hist_field *operand1, if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { - hist_err(HIST_ERR_TIMESTAMP_MISMATCH, 0); + hist_err(tr, HIST_ERR_TIMESTAMP_MISMATCH, 0); return -EINVAL; } @@ -2958,7 +2965,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, char *sep, *operand1_str; if (level > 3) { - hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } @@ -3003,7 +3010,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, goto free; } - ret = check_expr_operands(operand1, operand2); + ret = check_expr_operands(file->tr, operand1, operand2); if (ret) goto free; @@ -3196,14 +3203,14 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, int ret; if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); return ERR_PTR(-EINVAL); } file = event_file(tr, subsys_name, event_name); if (IS_ERR(file)) { - hist_err(HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); ret = PTR_ERR(file); return ERR_PTR(ret); } @@ -3216,7 +3223,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, */ hist_data = find_compatible_hist(target_hist_data, file); if (!hist_data) { - hist_err(HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3277,7 +3284,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); - hist_err(HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); return ERR_PTR(ret); } @@ -3289,7 +3296,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, if (IS_ERR_OR_NULL(event_var)) { kfree(var_hist->cmd); kfree(var_hist); - hist_err(HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3422,25 +3429,26 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, { struct hist_field *val = NULL, *var = NULL; unsigned long flags = HIST_FIELD_FL_VAR; + struct trace_array *tr = file->tr; struct field_var *field_var; int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); ret = -EINVAL; goto err; } val = parse_atom(hist_data, file, field_name, &flags, NULL); if (IS_ERR(val)) { - hist_err(HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); ret = PTR_ERR(val); goto err; } var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { - hist_err(HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); kfree(val); ret = PTR_ERR(var); goto err; @@ -3767,19 +3775,20 @@ static int track_data_create(struct hist_trigger_data *hist_data, { struct hist_field *var_field, *ref_field, *track_var = NULL; struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; char *track_data_var_str; int ret = 0; track_data_var_str = data->track_data.var_str; if (track_data_var_str[0] != '$') { - hist_err(HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); + hist_err(tr, HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); return -EINVAL; } track_data_var_str++; var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str); if (!var_field) { - hist_err(HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); + hist_err(tr, HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); return -EINVAL; } @@ -3792,7 +3801,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONMAX) track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3800,7 +3809,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONCHANGE) track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3811,7 +3820,8 @@ static int track_data_create(struct hist_trigger_data *hist_data, return ret; } -static int parse_action_params(char *params, struct action_data *data) +static int parse_action_params(struct trace_array *tr, char *params, + struct action_data *data) { char *param, *saved_param; bool first_param = true; @@ -3819,20 +3829,20 @@ static int parse_action_params(char *params, struct action_data *data) while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_PARAMS, 0); + hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); goto out; } param = strsep(¶ms, ","); if (!param) { - hist_err(HIST_ERR_PARAM_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, 0); ret = -EINVAL; goto out; } param = strstrip(param); if (strlen(param) < 2) { - hist_err(HIST_ERR_INVALID_PARAM, errpos(param)); + hist_err(tr, HIST_ERR_INVALID_PARAM, errpos(param)); ret = -EINVAL; goto out; } @@ -3856,7 +3866,7 @@ static int parse_action_params(char *params, struct action_data *data) return ret; } -static int action_parse(char *str, struct action_data *data, +static int action_parse(struct trace_array *tr, char *str, struct action_data *data, enum handler_id handler) { char *action_name; @@ -3864,14 +3874,14 @@ static int action_parse(char *str, struct action_data *data, strsep(&str, "."); if (!str) { - hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } action_name = strsep(&str, "("); if (!action_name || !str) { - hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } @@ -3880,12 +3890,12 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!params) { - hist_err(HIST_ERR_NO_SAVE_PARAMS, 0); + hist_err(tr, HIST_ERR_NO_SAVE_PARAMS, 0); ret = -EINVAL; goto out; } - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; @@ -3894,7 +3904,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3906,7 +3916,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!str) { - hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(params)); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(params)); ret = -EINVAL; goto out; } @@ -3916,7 +3926,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3931,7 +3941,7 @@ static int action_parse(char *str, struct action_data *data, data->use_trace_keyword = true; if (params) { - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; } @@ -3984,7 +3994,7 @@ static struct action_data *track_data_parse(struct hist_trigger_data *hist_data, goto free; } - ret = action_parse(str, data, handler); + ret = action_parse(hist_data->event_file->tr, str, data, handler); if (ret) goto free; out: @@ -4054,6 +4064,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, struct action_data *data, char *system, char *event, char *var) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field; var++; /* skip '$' */ @@ -4069,7 +4080,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, } if (!hist_field) - hist_err(HIST_ERR_PARAM_NOT_FOUND, errpos(var)); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, errpos(var)); return hist_field; } @@ -4127,6 +4138,7 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, static int trace_action_create(struct hist_trigger_data *hist_data, struct action_data *data) { + struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; unsigned int i, var_ref_idx; @@ -4144,7 +4156,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event = find_synth_event(synth_event_name); if (!event) { - hist_err(HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); + hist_err(tr, HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); return -EINVAL; } @@ -4205,14 +4217,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, continue; } - hist_err(HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); + hist_err(tr, HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); kfree(p); ret = -EINVAL; goto err; } if (field_pos != event->n_fields) { - hist_err(HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); + hist_err(tr, HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); ret = -EINVAL; goto err; } @@ -4231,6 +4243,7 @@ static int action_create(struct hist_trigger_data *hist_data, struct action_data *data) { struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; struct track_data *track_data; struct field_var *field_var; unsigned int i; @@ -4258,7 +4271,7 @@ static int action_create(struct hist_trigger_data *hist_data, if (data->action == ACTION_SAVE) { if (hist_data->n_save_vars) { ret = -EEXIST; - hist_err(HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); + hist_err(tr, HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); goto out; } @@ -4271,7 +4284,8 @@ static int action_create(struct hist_trigger_data *hist_data, field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { - hist_err(HIST_ERR_FIELD_VAR_CREATE_FAIL, errpos(param)); + hist_err(tr, HIST_ERR_FIELD_VAR_CREATE_FAIL, + errpos(param)); ret = PTR_ERR(field_var); kfree(param); goto out; @@ -4305,18 +4319,18 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) match_event = strsep(&str, ")"); if (!match_event || !str) { - hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); goto free; } match_event_system = strsep(&match_event, "."); if (!match_event) { - hist_err(HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); + hist_err(tr, HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); goto free; } if (IS_ERR(event_file(tr, match_event_system, match_event))) { - hist_err(HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); + hist_err(tr, HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); goto free; } @@ -4332,7 +4346,7 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) goto free; } - ret = action_parse(str, data, HANDLER_ONMATCH); + ret = action_parse(tr, str, data, HANDLER_ONMATCH); if (ret) goto free; out: @@ -4401,13 +4415,14 @@ static int create_var_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *var_name, char *expr_str) { + struct trace_array *tr = hist_data->event_file->tr; unsigned long flags = 0; if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) return -EINVAL; if (find_var(hist_data, file, var_name) && !hist_data->remove) { - hist_err(HIST_ERR_DUPLICATE_VAR, errpos(var_name)); + hist_err(tr, HIST_ERR_DUPLICATE_VAR, errpos(var_name)); return -EINVAL; } @@ -4464,8 +4479,8 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *field_str) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field = NULL; - unsigned long flags = 0; unsigned int key_size; int ret = 0; @@ -4488,7 +4503,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { - hist_err(HIST_ERR_INVALID_REF_KEY, errpos(field_str)); + hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; goto out; @@ -4589,6 +4604,7 @@ static void free_var_defs(struct hist_trigger_data *hist_data) static int parse_var_defs(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *s, *str, *var_name, *field_str; unsigned int i, j, n_vars = 0; int ret = 0; @@ -4602,13 +4618,14 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) var_name = strsep(&field_str, "="); if (!var_name || !field_str) { - hist_err(HIST_ERR_MALFORMED_ASSIGNMENT, errpos(var_name)); + hist_err(tr, HIST_ERR_MALFORMED_ASSIGNMENT, + errpos(var_name)); ret = -EINVAL; goto free; } if (n_vars == TRACING_MAP_VARS_MAX) { - hist_err(HIST_ERR_TOO_MANY_VARS, errpos(var_name)); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(var_name)); ret = -EINVAL; goto free; } @@ -5829,6 +5846,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, { struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL; + struct trace_array *tr = file->tr; int ret = 0; if (hist_data->attrs->name) { @@ -5836,7 +5854,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { - hist_err(HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); + hist_err(tr, HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); ret = -EINVAL; goto out; } @@ -5857,7 +5875,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, else if (hist_data->attrs->clear) hist_clear(test); else { - hist_err(HIST_ERR_TRIGGER_EEXIST, 0); + hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; } goto out; @@ -5865,7 +5883,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, } new: if (hist_data->attrs->cont || hist_data->attrs->clear) { - hist_err(HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); + hist_err(tr, HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); ret = -ENOENT; goto out; } @@ -5890,7 +5908,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, ret = tracing_set_clock(file->tr, hist_data->attrs->clock); if (ret) { - hist_err(HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); + hist_err(tr, HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); goto out; } @@ -6108,7 +6126,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, trigger = strstrip(trigger); } - attrs = parse_hist_trigger_attrs(trigger); + attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); From 2f754e771b1a6feba670782e82c45555984ac43b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 22:52:21 -0400 Subject: [PATCH 10/44] tracing: Have the error logs show up in the proper instances As each instance has their own error_log file, it makes more sense that the instances show the errors of their own instead of all error_logs having the same data. Make it that the errors show up in the instance error_log file that the error happens in. If no instance trace_array is available, then NULL can be passed in which will create the error in the top level instance (the one at the top of the tracefs directory). Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 55 ++++++++++++++++++++---------- kernel/trace/trace.h | 5 ++- kernel/trace/trace_events_filter.c | 4 +-- kernel/trace/trace_events_hist.c | 3 +- kernel/trace/trace_probe.c | 2 +- 5 files changed, 46 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7978168f5041..3d55e9daae8c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6897,25 +6897,22 @@ struct tracing_log_err { char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ }; -static LIST_HEAD(tracing_err_log); static DEFINE_MUTEX(tracing_err_log_lock); -static unsigned int n_tracing_err_log_entries; - -struct tracing_log_err *get_tracing_log_err(void) +struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) { struct tracing_log_err *err; - if (n_tracing_err_log_entries < TRACING_LOG_ERRS_MAX) { + if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - n_tracing_err_log_entries++; + tr->n_err_log_entries++; return err; } - err = list_first_entry(&tracing_err_log, struct tracing_log_err, list); + err = list_first_entry(&tr->err_log, struct tracing_log_err, list); list_del(&err->list); return err; @@ -6949,6 +6946,7 @@ unsigned int err_pos(char *cmd, const char *str) /** * tracing_log_err - write an error to the tracing error log + * @tr: The associated trace array for the error (NULL for top level array) * @loc: A string describing where the error occurred * @cmd: The tracing command that caused the error * @errs: The array of loc-specific static error strings @@ -6973,13 +6971,17 @@ unsigned int err_pos(char *cmd, const char *str) * existing callers for examples of how static strings are typically * defined for use with tracing_log_err(). */ -void tracing_log_err(const char *loc, const char *cmd, +void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, const char **errs, u8 type, u8 pos) { struct tracing_log_err *err; + if (!tr) + tr = &global_trace; + mutex_lock(&tracing_err_log_lock); - err = get_tracing_log_err(); + err = get_tracing_log_err(tr); if (PTR_ERR(err) == -ENOMEM) { mutex_unlock(&tracing_err_log_lock); return; @@ -6993,34 +6995,38 @@ void tracing_log_err(const char *loc, const char *cmd, err->info.pos = pos; err->info.ts = local_clock(); - list_add_tail(&err->list, &tracing_err_log); + list_add_tail(&err->list, &tr->err_log); mutex_unlock(&tracing_err_log_lock); } -static void clear_tracing_err_log(void) +static void clear_tracing_err_log(struct trace_array *tr) { struct tracing_log_err *err, *next; mutex_lock(&tracing_err_log_lock); - list_for_each_entry_safe(err, next, &tracing_err_log, list) { + list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); kfree(err); } - n_tracing_err_log_entries = 0; + tr->n_err_log_entries = 0; mutex_unlock(&tracing_err_log_lock); } static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) { + struct trace_array *tr = m->private; + mutex_lock(&tracing_err_log_lock); - return seq_list_start(&tracing_err_log, *pos); + return seq_list_start(&tr->err_log, *pos); } static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &tracing_err_log, pos); + struct trace_array *tr = m->private; + + return seq_list_next(v, &tr->err_log, pos); } static void tracing_err_log_seq_stop(struct seq_file *m, void *v) @@ -7067,15 +7073,25 @@ static const struct seq_operations tracing_err_log_seq_ops = { static int tracing_err_log_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was opened for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - clear_tracing_err_log(); + clear_tracing_err_log(tr); - if (file->f_mode & FMODE_READ) + if (file->f_mode & FMODE_READ) { ret = seq_open(file, &tracing_err_log_seq_ops); - + if (!ret) { + struct seq_file *m = file->private_data; + m->private = tr; + } else { + trace_array_put(tr); + } + } return ret; } @@ -7091,6 +7107,7 @@ static const struct file_operations tracing_err_log_fops = { .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, + .release = tracing_release_generic_tr, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) @@ -8293,6 +8310,7 @@ struct trace_array *trace_array_create(const char *name) INIT_LIST_HEAD(&tr->systems); INIT_LIST_HEAD(&tr->events); INIT_LIST_HEAD(&tr->hist_vars); + INIT_LIST_HEAD(&tr->err_log); if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; @@ -9087,6 +9105,7 @@ __init static int tracer_alloc_buffers(void) INIT_LIST_HEAD(&global_trace.systems); INIT_LIST_HEAD(&global_trace.events); INIT_LIST_HEAD(&global_trace.hist_vars); + INIT_LIST_HEAD(&global_trace.err_log); list_add(&global_trace.list, &ftrace_trace_arrays); apply_trace_boot_options(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 809c5d7f0064..da00a3d508c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -293,11 +293,13 @@ struct trace_array { int nr_topts; bool clear_trace; int buffer_percent; + unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; + struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; @@ -1886,7 +1888,8 @@ extern ssize_t trace_parse_run_command(struct file *file, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); -extern void tracing_log_err(const char *loc, const char *cmd, +extern void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2b63930cd3e6..180ecb390baa 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -949,12 +949,12 @@ static void append_filter_err(struct trace_array *tr, if (pe->lasterr > 0) { trace_seq_printf(s, "\n%*s", pos, "^"); trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); - tracing_log_err("event filter parse error", + tracing_log_err(tr, "event filter parse error", filter->filter_string, err_text, pe->lasterr, pe->lasterr_pos); } else { trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); - tracing_log_err("event filter parse error", + tracing_log_err(tr, "event filter parse error", filter->filter_string, err_text, FILT_ERR_ERRNO, 0); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index a167e439e9a1..a1136e043f17 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -621,7 +621,8 @@ static void last_cmd_set(struct trace_event_file *file, char *str) static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos) { - tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); + tracing_log_err(tr, last_cmd_loc, last_cmd, err_text, + err_type, err_pos); } static void hist_err_clear(void) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e11f98c49d72..4cc2d467d34c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -186,7 +186,7 @@ void __trace_probe_log_err(int offset, int err_type) } *(p - 1) = '\0'; - tracing_log_err(trace_probe_log.subsystem, command, + tracing_log_err(NULL, trace_probe_log.subsystem, command, trace_probe_err_text, err_type, pos + offset); kfree(command); From 8ab4483eb660b37251847e1e2a1f787b8d568e81 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 31 Mar 2019 18:48:20 -0500 Subject: [PATCH 11/44] selftests/ftrace: Add error_log testcase for probe errors Add error_log testcase for error logs on probe events. This tests most of error cases and checks the error position is correct. Link: http://lkml.kernel.org/r/63d695b74e0965988fa54ffa12beeb2c3475250d.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Namhyung Kim Signed-off-by: Masami Hiramatsu [tom.zanussi@linux.intel.com: changed >& redirection to 2>] Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../test.d/kprobe/kprobe_syntax_errors.tc | 93 +++++++++++++++++++ .../test.d/kprobe/uprobe_syntax_errors.tc | 31 +++++++ 2 files changed, 124 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc new file mode 100644 index 000000000000..7eb577b1d222 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -0,0 +1,93 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event parser error log check + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +[ -f error_log ] || exit_unsupported + +check_error() { # command-with-error-pos-by-^ +pos=$(echo -n "${1%^*}" | wc -c) # error position +command=$(echo "$1" | tr -d ^) +echo "Test command: $command" +echo > error_log +(! echo "$command" > kprobe_events ) 2> /dev/null +grep "trace_kprobe: error:" -A 3 error_log +N=$(tail -n 1 error_log | wc -c) +# " Command: " and "^\n" => 13 +test $(expr 13 + $pos) -eq $N +} + +if grep -q 'r\[maxactive\]' README; then +check_error 'p^100 vfs_read' # MAXACT_NO_KPROBE +check_error 'r^1a111 vfs_read' # BAD_MAXACT +check_error 'r^100000 vfs_read' # MAXACT_TOO_BIG +fi + +check_error 'p ^non_exist_func' # BAD_PROBE_ADDR (enoent) +check_error 'p ^hoge-fuga' # BAD_PROBE_ADDR (bad syntax) +check_error 'p ^hoge+1000-1000' # BAD_PROBE_ADDR (bad syntax) +check_error 'r ^vfs_read+10' # BAD_RETPROBE +check_error 'p:^/bar vfs_read' # NO_GROUP_NAME +check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG + +check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME +check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME +check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG +check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME + +check_error 'p vfs_read ^$retval' # RETVAL_ON_PROBE +check_error 'p vfs_read ^$stack10000' # BAD_STACK_NUM + +if grep -q '$arg' README; then +check_error 'p vfs_read ^$arg10000' # BAD_ARG_NUM +fi + +check_error 'p vfs_read ^$none_var' # BAD_VAR + +check_error 'p vfs_read ^%none_reg' # BAD_REG_NAME +check_error 'p vfs_read ^@12345678abcde' # BAD_MEM_ADDR +check_error 'p vfs_read ^@+10' # FILE_ON_KPROBE + +check_error 'p vfs_read ^+0@0)' # DEREF_NEED_BRACE +check_error 'p vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS +check_error 'p vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE + +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then +check_error 'p vfs_read +0(^$comm)' # COMM_CANT_DEREF +fi + +check_error 'p vfs_read ^&1' # BAD_FETCH_ARG + + +# We've introduced this limitation with array support +if grep -q ' \\\[\\\]' README; then +check_error 'p vfs_read +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS? +check_error 'p vfs_read +0(@11):u8[10^' # ARRAY_NO_CLOSE +check_error 'p vfs_read +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX +check_error 'p vfs_read +0(@11):u8[^10a]' # BAD_ARRAY_NUM +check_error 'p vfs_read +0(@11):u8[^256]' # ARRAY_TOO_BIG +fi + +check_error 'p vfs_read @11:^unknown_type' # BAD_TYPE +check_error 'p vfs_read $stack0:^string' # BAD_STRING +check_error 'p vfs_read @11:^b10@a/16' # BAD_BITFIELD + +check_error 'p vfs_read ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG +check_error 'p vfs_read ^=@11' # NO_ARG_NAME +check_error 'p vfs_read ^var.1=@11' # BAD_ARG_NAME +check_error 'p vfs_read var1=@11 ^var1=@12' # USED_ARG_NAME +check_error 'p vfs_read ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG +check_error 'p vfs_read arg1=^' # NO_ARG_BODY + +# instruction boundary check is valid on x86 (at this moment) +case $(uname -m) in + x86_64|i[3456]86) + echo 'p vfs_read' > kprobe_events + if grep -q FTRACE ../kprobes/list ; then + check_error 'p ^vfs_read+3' # BAD_INSN_BNDRY (only if function-tracer is enabled) + fi + ;; +esac + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc new file mode 100644 index 000000000000..ec7389b7934b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Uprobe event parser error log check + +[ -f uprobe_events ] || exit_unsupported # this is configurable + +[ -f error_log ] || exit_unsupported + +check_error() { # command-with-error-pos-by-^ +pos=$(echo -n "${1%^*}" | wc -c) # error position +command=$(echo "$1" | tr -d ^) +echo "Test command: $command" +echo > error_log +(! echo "$command" > uprobe_events ) 2> /dev/null +grep "trace_uprobe: error:" -A 3 error_log +N=$(tail -n 1 error_log | wc -c) +# " Command: " and "^\n" => 13 +test $(expr 13 + $pos) -eq $N +} + +check_error 'p ^/non_exist_file:100' # FILE_NOT_FOUND +check_error 'p ^/sys:100' # NO_REGULAR_FILE +check_error 'p /bin/sh:^10a' # BAD_UPROBE_OFFS +check_error 'p /bin/sh:10(^1a)' # BAD_REFCNT +check_error 'p /bin/sh:10(10^' # REFCNT_OPEN_BRACE +check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX + +check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS +check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE + +exit 0 From c5e4114fee0b6582b6e86804ffef95bf82bb5f78 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:21 -0500 Subject: [PATCH 12/44] selftests/ftrace: Move kprobe/uprobe check_error() to test.d/functions The k/uprobe_sytax_errors test case defines a check_error() function used to run a command and check the position of the caret in the output. This would be useful for other ftrace facilities too, so move it to test.d/functions for use by anyone. In the process, rename it to ftrace_errlog_check() and parametrize it for general use. Link: http://lkml.kernel.org/r/9f88080a06f1755811f69081926afe7e5cb53178.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/functions | 12 ++++++++++++ .../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 10 +--------- .../ftrace/test.d/kprobe/uprobe_syntax_errors.tc | 10 +--------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 7b96e80e6b8a..779ec11f61bd 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -109,3 +109,15 @@ LOCALHOST=127.0.0.1 yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } + +ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file + pos=$(echo -n "${2%^*}" | wc -c) # error position + command=$(echo "$2" | tr -d ^) + echo "Test command: $command" + echo > error_log + (! echo "$command" > "$3" ) 2> /dev/null + grep "$1: error:" -A 3 error_log + N=$(tail -n 1 error_log | wc -c) + # " Command: " and "^\n" => 13 + test $(expr 13 + $pos) -eq $N +} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 7eb577b1d222..29faaec942c6 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -7,15 +7,7 @@ [ -f error_log ] || exit_unsupported check_error() { # command-with-error-pos-by-^ -pos=$(echo -n "${1%^*}" | wc -c) # error position -command=$(echo "$1" | tr -d ^) -echo "Test command: $command" -echo > error_log -(! echo "$command" > kprobe_events ) 2> /dev/null -grep "trace_kprobe: error:" -A 3 error_log -N=$(tail -n 1 error_log | wc -c) -# " Command: " and "^\n" => 13 -test $(expr 13 + $pos) -eq $N + ftrace_errlog_check 'trace_kprobe' "$1" 'kprobe_events' } if grep -q 'r\[maxactive\]' README; then diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index ec7389b7934b..14229d5778a0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -7,15 +7,7 @@ [ -f error_log ] || exit_unsupported check_error() { # command-with-error-pos-by-^ -pos=$(echo -n "${1%^*}" | wc -c) # error position -command=$(echo "$1" | tr -d ^) -echo "Test command: $command" -echo > error_log -(! echo "$command" > uprobe_events ) 2> /dev/null -grep "trace_uprobe: error:" -A 3 error_log -N=$(tail -n 1 error_log | wc -c) -# " Command: " and "^\n" => 13 -test $(expr 13 + $pos) -eq $N + ftrace_errlog_check 'trace_uprobe' "$1" 'uprobe_events' } check_error 'p ^/non_exist_file:100' # FILE_NOT_FOUND From 0ae8dde9d7b62b40fb16b5d3feef467604b9a771 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:22 -0500 Subject: [PATCH 13/44] selftests/ftrace: Remove trigger-extended-error-support testcase Error handling has been moved to the common tracing/error_log, so this test is no longer valid. Link: http://lkml.kernel.org/r/876a98b21018814cbf46f0a3605ae0906c51d53c.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../trigger-extended-error-support.tc | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc deleted file mode 100644 index 9912616a8672..000000000000 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# description: event trigger - test extended error support - - -fail() { #msg - echo $1 - exit_fail -} - -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -echo "Test extended error support" -echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger -! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null -if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then - fail "Failed to generate extended error in histogram" -fi - -exit 0 From 4eab1cc461a6e820eddf88ac63eed98793c96a7c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:23 -0500 Subject: [PATCH 14/44] selftests/ftrace: Add tracing/error_log testcase Add a testcase verifying basic tracing/error_log functionality. Link: http://lkml.kernel.org/r/bf1c0d47a24672df945331462682d96296d1ab28.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/ftrace/tracing-error-log.tc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc new file mode 100644 index 000000000000..021c03fd885d --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - test tracing error log support + +fail() { #msg + echo $1 + exit_fail +} + +# event tracing is currently the only ftrace tracer that uses the +# tracing error_log, hence this check +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +ftrace_errlog_check 'event filter parse error' '((sig >= 10 && sig < 15) || dsig ^== 17) && comm != bash' 'events/signal/signal_generate/filter' + +exit 0 From 26a944917989d57c0dddec284b3088fa31127717 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:24 -0500 Subject: [PATCH 15/44] tracing: Add tracing/error_log Documentation Move most of the hist trigger extended error documentation to ftrace.rst and expand on it to fully document tracing/error_log. Link: http://lkml.kernel.org/r/c5d53c8f643ef6844d6ad8d0200c116936730b01.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/ftrace.rst | 31 +++++++++++++++++++++++++++++++ Documentation/trace/histogram.rst | 16 ++-------------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 7c5e6d6ab5d1..809b39d066ee 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -765,6 +765,37 @@ Here is the list of current tracers that may be configured. tracers from tracing simply echo "nop" into current_tracer. +Error conditions +---------------- + + For most ftrace commands, failure modes are obvious and communicated + using standard return codes. + + For other more involved commands, extended error information may be + available via the tracing/error_log file. For the commands that + support it, reading the tracing/error_log file after an error will + display more detailed information about what went wrong, if + information is available. The tracing/error_log file is a circular + error log displaying a small number (currently, 8) of ftrace errors + for the last (8) failed commands. + + The extended error information and usage takes the form shown in + this example:: + + # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + echo: write error: Invalid argument + + # cat /sys/kernel/debug/tracing/error_log + [ 5348.887237] location: error: Couldn't yyy: zzz + Command: xxx + ^ + [ 7517.023364] location: error: Bad rrr: sss + Command: ppp qqq + ^ + + To clear the error log, echo the empty string into it:: + + # echo > /sys/kernel/debug/tracing/error_log Examples of using the tracer ---------------------------- diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 0ea59d45aef1..7612c7ad5715 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -199,20 +199,8 @@ Extended error information For some error conditions encountered when invoking a hist trigger command, extended error information is available via the - corresponding event's 'hist' file. Reading the hist file after an - error will display more detailed information about what went wrong, - if information is available. This extended error information will - be available until the next hist trigger command for that event. - - If available for a given error condition, the extended error - information and usage takes the following form:: - - # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger - echo: write error: Invalid argument - - # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist - ERROR: Couldn't yyy: zzz - Last command: xxx + tracing/error_log file. See Error Conditions in + :file:`Documentation/trace/ftrace.rst` for details. 6.2 'hist' trigger examples --------------------------- From a8d655792a32312f6715ac789b860fee50168106 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:25 -0500 Subject: [PATCH 16/44] tracing: Add error_log to README Add brief blurb about error_log to the 'Important files' section. Link: http://lkml.kernel.org/r/c81e60f9aded495081231a32d2d1023c4d043a7a.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3d55e9daae8c..2bc18de7f0dc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4702,6 +4702,7 @@ static const char readme_msg[] = " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" " current_tracer\t- function and latency tracers\n" " available_tracers\t- list of configured tracers for current_tracer\n" + " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" " trace_clock\t\t-change the clock used to order events\n" From 163363455b42a1cf833742177149d1352dfe673e Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 26 Mar 2019 20:13:09 +0800 Subject: [PATCH 17/44] tracing: introduce TRACE_EVENT_NOP() Sometimes we want to define a tracepoint as a do-nothing function. So I introduce TRACE_EVENT_NOP, DECLARE_EVENT_CLASS_NOP and DEFINE_EVENT_NOP for this kind of usage. Link: http://lkml.kernel.org/r/1553602391-11926-2-git-send-email-laoar.shao@gmail.com Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 15 +++++++++++++++ include/trace/define_trace.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9c3186578ce0..86b019aa2839 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -548,4 +548,19 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define TRACE_EVENT_PERF_PERM(event, expr...) +#define DECLARE_EVENT_NOP(name, proto, args) \ + static inline void trace_##name(proto) \ + { } \ + static inline bool trace_##name##_enabled(void) \ + { \ + return false; \ + } + +#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) \ + DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) + +#define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT_NOP(template, name, proto, args) \ + DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index cb30c5532144..bd75f97867b9 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -46,6 +46,12 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef TRACE_EVENT_NOP +#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) + +#undef DEFINE_EVENT_NOP +#define DEFINE_EVENT_NOP(template, name, proto, args) + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) @@ -102,6 +108,8 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_FN_COND #undef TRACE_EVENT_CONDITION +#undef TRACE_EVENT_NOP +#undef DEFINE_EVENT_NOP #undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_FN From 2a09b5de235a6b5f76193a2ed46546a2944f98bf Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 26 Mar 2019 20:13:10 +0800 Subject: [PATCH 18/44] sched/fair: do not expose some tracepoints to user if CONFIG_SCHEDSTATS is not set The tracepoints trace_sched_stat_{iowait, blocked, wait, sleep} should be not exposed to user if CONFIG_SCHEDSTATS is not set. Link: http://lkml.kernel.org/r/1553602391-11926-3-git-send-email-laoar.shao@gmail.com Acked-by: Peter Zijlstra (Intel) Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/sched.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9a4bdfadab07..c8c7c7efb487 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -241,7 +241,6 @@ DECLARE_EVENT_CLASS(sched_process_template, DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); - /* * Tracepoint for a task exiting: @@ -336,11 +335,20 @@ TRACE_EVENT(sched_process_exec, __entry->pid, __entry->old_pid) ); + +#ifdef CONFIG_SCHEDSTATS +#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT +#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS +#else +#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP +#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP +#endif + /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -DECLARE_EVENT_CLASS(sched_stat_template, +DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -363,12 +371,11 @@ DECLARE_EVENT_CLASS(sched_stat_template, (unsigned long long)__entry->delay) ); - /* * Tracepoint for accounting wait time (time the task is runnable * but not actually running due to scheduler contention). */ -DEFINE_EVENT(sched_stat_template, sched_stat_wait, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); @@ -376,7 +383,7 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT(sched_stat_template, sched_stat_sleep, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); @@ -384,14 +391,14 @@ DEFINE_EVENT(sched_stat_template, sched_stat_sleep, * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT(sched_stat_template, sched_stat_iowait, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); /* * Tracepoint for accounting blocked time (time the task is in uninterruptible). */ -DEFINE_EVENT(sched_stat_template, sched_stat_blocked, +DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); From 4f5fbd78a7b40bab538ae0d316363530da751e42 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 26 Mar 2019 20:13:11 +0800 Subject: [PATCH 19/44] rcu: validate arguments for rcu tracepoints When CONFIG_RCU_TRACE is not set, all these tracepoints are defined as do-nothing macro. We'd better make those inline functions that take proper arguments. As RCU_TRACE() is defined as do-nothing marco as well when CONFIG_RCU_TRACE is not set, so we can clean it up. Link: http://lkml.kernel.org/r/1553602391-11926-4-git-send-email-laoar.shao@gmail.com Reviewed-by: Paul E. McKenney Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/rcu.h | 81 ++++++++++++-------------------------- kernel/rcu/rcu.h | 9 +---- kernel/rcu/tree.c | 8 ++-- 3 files changed, 31 insertions(+), 67 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index f0c4d10e614b..e3f357b89432 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,6 +7,12 @@ #include +#ifdef CONFIG_RCU_TRACE +#define TRACE_EVENT_RCU TRACE_EVENT +#else +#define TRACE_EVENT_RCU TRACE_EVENT_NOP +#endif + /* * Tracepoint for start/end markers used for utilization calculations. * By convention, the string is of the following forms: @@ -35,8 +41,6 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); -#ifdef CONFIG_RCU_TRACE - #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) /* @@ -62,7 +66,7 @@ TRACE_EVENT(rcu_utilization, * "end": End a grace period. * "cpuend": CPU first notices a grace-period end. */ -TRACE_EVENT(rcu_grace_period, +TRACE_EVENT_RCU(rcu_grace_period, TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent), @@ -101,7 +105,7 @@ TRACE_EVENT(rcu_grace_period, * "Cleanup": Clean up rcu_node structure after previous GP. * "CleanupMore": Clean up, and another GP is needed. */ -TRACE_EVENT(rcu_future_grace_period, +TRACE_EVENT_RCU(rcu_future_grace_period, TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long gp_seq_req, u8 level, int grplo, int grphi, @@ -141,7 +145,7 @@ TRACE_EVENT(rcu_future_grace_period, * rcu_node structure, and the mask of CPUs that will be waited for. * All but the type of RCU are extracted from the rcu_node structure. */ -TRACE_EVENT(rcu_grace_period_init, +TRACE_EVENT_RCU(rcu_grace_period_init, TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level, int grplo, int grphi, unsigned long qsmask), @@ -186,7 +190,7 @@ TRACE_EVENT(rcu_grace_period_init, * "endwake": Woke piggybackers up. * "done": Someone else did the expedited grace period for us. */ -TRACE_EVENT(rcu_exp_grace_period, +TRACE_EVENT_RCU(rcu_exp_grace_period, TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent), @@ -218,7 +222,7 @@ TRACE_EVENT(rcu_exp_grace_period, * "nxtlvl": Advance to next level of rcu_node funnel * "wait": Wait for someone else to do expedited GP */ -TRACE_EVENT(rcu_exp_funnel_lock, +TRACE_EVENT_RCU(rcu_exp_funnel_lock, TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi, const char *gpevent), @@ -269,7 +273,7 @@ TRACE_EVENT(rcu_exp_funnel_lock, * "WaitQueue": Enqueue partially done, timed wait for it to complete. * "WokeQueue": Partial enqueue now complete. */ -TRACE_EVENT(rcu_nocb_wake, +TRACE_EVENT_RCU(rcu_nocb_wake, TP_PROTO(const char *rcuname, int cpu, const char *reason), @@ -297,7 +301,7 @@ TRACE_EVENT(rcu_nocb_wake, * include SRCU), the grace-period number that the task is blocking * (the current or the next), and the task's PID. */ -TRACE_EVENT(rcu_preempt_task, +TRACE_EVENT_RCU(rcu_preempt_task, TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq), @@ -324,7 +328,7 @@ TRACE_EVENT(rcu_preempt_task, * read-side critical section exiting that critical section. Track the * type of RCU (which one day might include SRCU) and the task's PID. */ -TRACE_EVENT(rcu_unlock_preempted_task, +TRACE_EVENT_RCU(rcu_unlock_preempted_task, TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid), @@ -353,7 +357,7 @@ TRACE_EVENT(rcu_unlock_preempted_task, * whether there are any blocked tasks blocking the current grace period. * All but the type of RCU are extracted from the rcu_node structure. */ -TRACE_EVENT(rcu_quiescent_state_report, +TRACE_EVENT_RCU(rcu_quiescent_state_report, TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long mask, unsigned long qsmask, @@ -396,7 +400,7 @@ TRACE_EVENT(rcu_quiescent_state_report, * state, which can be "dti" for dyntick-idle mode or "kick" when kicking * a CPU that has been in dyntick-idle mode for too long. */ -TRACE_EVENT(rcu_fqs, +TRACE_EVENT_RCU(rcu_fqs, TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent), @@ -436,7 +440,7 @@ TRACE_EVENT(rcu_fqs, * events use two separate counters, and that the "++=" and "--=" events * for irq/NMI will change the counter by two, otherwise by one. */ -TRACE_EVENT(rcu_dyntick, +TRACE_EVENT_RCU(rcu_dyntick, TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks), @@ -468,7 +472,7 @@ TRACE_EVENT(rcu_dyntick, * number of lazy callbacks queued, and the fourth element is the * total number of callbacks queued. */ -TRACE_EVENT(rcu_callback, +TRACE_EVENT_RCU(rcu_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, long qlen_lazy, long qlen), @@ -504,7 +508,7 @@ TRACE_EVENT(rcu_callback, * the fourth argument is the number of lazy callbacks queued, and the * fifth argument is the total number of callbacks queued. */ -TRACE_EVENT(rcu_kfree_callback, +TRACE_EVENT_RCU(rcu_kfree_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset, long qlen_lazy, long qlen), @@ -539,7 +543,7 @@ TRACE_EVENT(rcu_kfree_callback, * the total number of callbacks queued, and the fourth argument is * the current RCU-callback batch limit. */ -TRACE_EVENT(rcu_batch_start, +TRACE_EVENT_RCU(rcu_batch_start, TP_PROTO(const char *rcuname, long qlen_lazy, long qlen, long blimit), @@ -569,7 +573,7 @@ TRACE_EVENT(rcu_batch_start, * The first argument is the type of RCU, and the second argument is * a pointer to the RCU callback itself. */ -TRACE_EVENT(rcu_invoke_callback, +TRACE_EVENT_RCU(rcu_invoke_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp), @@ -598,7 +602,7 @@ TRACE_EVENT(rcu_invoke_callback, * is the offset of the callback within the enclosing RCU-protected * data structure. */ -TRACE_EVENT(rcu_invoke_kfree_callback, +TRACE_EVENT_RCU(rcu_invoke_kfree_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset), @@ -631,7 +635,7 @@ TRACE_EVENT(rcu_invoke_kfree_callback, * and the sixth argument (risk) is the return value from * rcu_is_callbacks_kthread(). */ -TRACE_EVENT(rcu_batch_end, +TRACE_EVENT_RCU(rcu_batch_end, TP_PROTO(const char *rcuname, int callbacks_invoked, char cb, char nr, char iit, char risk), @@ -673,7 +677,7 @@ TRACE_EVENT(rcu_batch_end, * callback address can be NULL. */ #define RCUTORTURENAME_LEN 8 -TRACE_EVENT(rcu_torture_read, +TRACE_EVENT_RCU(rcu_torture_read, TP_PROTO(const char *rcutorturename, struct rcu_head *rhp, unsigned long secs, unsigned long c_old, unsigned long c), @@ -721,7 +725,7 @@ TRACE_EVENT(rcu_torture_read, * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ -TRACE_EVENT(rcu_barrier, +TRACE_EVENT_RCU(rcu_barrier, TP_PROTO(const char *rcuname, const char *s, int cpu, int cnt, unsigned long done), @@ -748,41 +752,6 @@ TRACE_EVENT(rcu_barrier, __entry->done) ); -#else /* #ifdef CONFIG_RCU_TRACE */ - -#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0) -#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \ - level, grplo, grphi, event) \ - do { } while (0) -#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \ - qsmask) do { } while (0) -#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \ - do { } while (0) -#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \ - do { } while (0) -#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) -#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) -#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \ - grplo, grphi, gp_tasks) do { } \ - while (0) -#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) -#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) -#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ - do { } while (0) -#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \ - do { } while (0) -#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) -#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) -#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ - do { } while (0) -#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ - do { } while (0) -#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) - -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index acee72c0b24b..442ace406ac9 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -11,11 +11,6 @@ #define __LINUX_RCU_H #include -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(stmt) stmt -#else /* #ifdef CONFIG_RCU_TRACE */ -#define RCU_TRACE(stmt) -#endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ #define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) @@ -216,12 +211,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) + trace_rcu_invoke_kfree_callback(rn, head, offset); kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { - RCU_TRACE(trace_rcu_invoke_callback(rn, head);) + trace_rcu_invoke_callback(rn, head); f = head->func; WRITE_ONCE(head->func, (rcu_callback_t)0L); f(head); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf56faf..906563a1cdea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2352,14 +2352,14 @@ rcu_check_quiescent_state(struct rcu_data *rdp) */ int rcutree_dying_cpu(unsigned int cpu) { - RCU_TRACE(bool blkd;) - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(&rcu_data);) - RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) + bool blkd; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp = rdp->mynode; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return 0; - RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) + blkd = !!(rnp->qsmask & rdp->grpmask); trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); return 0; From ee6a6500fe1f5c5a3f18de33fe0178a3c627f6d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Apr 2019 10:45:38 -0400 Subject: [PATCH 20/44] ftrace: Remove ASSIGN_OPS_HASH() macro from ftrace.c The ASSIGN_OPS_HASH() macro was moved to fgraph.c where it was used, but for some reason it wasn't removed from ftrace.c, as it is no longer referenced there. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 26c8ca9bd06b..bf11e0553450 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -69,12 +69,8 @@ #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), #else #define INIT_OPS_HASH(opsname) -#define ASSIGN_OPS_HASH(opsname, val) #endif enum { From 2fa717a0337e7acafda9283c938b635191b8036b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Apr 2019 11:46:13 -0400 Subject: [PATCH 21/44] ftrace: Do not process STUB functions in ftrace_ops_list_func() The function_graph tracer has a stub function and its ops flag has the FTRACE_OPS_FL_STUB set. As the function graph does not use the ftrace_ops->func pointer but instead is called by a separate part of the ftrace trampoline. The function_graph tracer still requires to pass in a ftrace_ops that may also hold the hash of the functions to call. But there's no reason to test that hash in the function tracing portion. Instead of testing to see if we should call the stub function, just test if the ops has FTRACE_OPS_FL_STUB set, and just skip it. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bf11e0553450..433a64f49532 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6260,6 +6260,9 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { + /* Stub functions don't need to be called nor tested */ + if (op->flags & FTRACE_OPS_FL_STUB) + continue; /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true From 52fde6e70cccc2fcf3f39fed0d0392960e2c2b03 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 21 Apr 2019 19:40:44 -0400 Subject: [PATCH 22/44] function_graph: Have selftest also emulate tr->reset() as it did with tr->init() The function_graph boot up self test emulates the tr->init() function in order to add a wrapper around the function graph tracer entry code to test for lock ups and such. But it does not emulate the tr->reset(), and just calls the function_graph tracer tr->reset() function which will use its own fgraph_ops to unregister function tracing with. As the fgraph_ops is becoming more meaningful with the register_ftrace_graph() and unregister_ftrace_graph() functions, the two need to be the same. The emulated tr->init() uses its own fgraph_ops descriptor, which means the unregister_ftrace_graph() must use the same ftrace_ops, which the selftest currently does not do. By emulating the tr->reset() as the selftest does with the tr->init() it will be able to pass the same fgraph_ops descriptor to the unregister_ftrace_graph() as it did with the register_ftrace_graph(). Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_selftest.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9d402e7fc949..69ee8ef12cee 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -792,7 +792,10 @@ trace_selftest_startup_function_graph(struct tracer *trace, /* check the trace buffer */ ret = trace_test_buffer(&tr->trace_buffer, &count); - trace->reset(tr); + /* Need to also simulate the tr->reset to remove this fgraph_ops */ + tracing_stop_cmdline_record(); + unregister_ftrace_graph(&fgraph_ops); + tracing_start(); if (!ret && !count) { From e8025bab7bfbd48d262c01c26c15a9d465fbb083 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 24 Apr 2019 12:34:46 -0400 Subject: [PATCH 23/44] function_graph: Place ftrace_graph_entry_stub() prototype in include/linux/ftrace.h ftrace_graph_entry_stub() is defined in generic code, its prototype should be in the generic header and not defined throughout architecture specific code in order to use it. Cc: Greentime Hu Cc: Vincent Chen Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 1 - arch/parisc/kernel/ftrace.c | 1 - include/linux/ftrace.h | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 8a41372551ff..fd2a54b8cd57 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -7,7 +7,6 @@ #ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, struct ftrace_ops*, struct pt_regs*); -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index e46a4157a894..a28f915993b1 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -51,7 +51,6 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long org_sp_gr3) { extern ftrace_func_t ftrace_trace_function; /* depends on CONFIG_DYNAMIC_FTRACE */ - extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); if (ftrace_trace_function != ftrace_stub) { /* struct ftrace_ops *op, struct pt_regs *regs); */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 730876187344..9b28fce436ca 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -751,6 +751,8 @@ struct ftrace_graph_ret { typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER struct fgraph_ops { From dbfe67334a1767bcb7be8b50bd237b22b272ef23 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:04 -0700 Subject: [PATCH 24/44] tracing: kdb: The skip_lines parameter should have been skip_entries The things skipped by kdb's "ftdump" command when you pass it a parameter has always been entries, not lines. The difference usually doesn't matter but when the trace buffer has multi-line entries (like a stack dump) it can matter. Let's fix this both in the help text for ftdump and also in the local variable names. Link: http://lkml.kernel.org/r/20190319171206.97107-1-dianders@chromium.org Acked-by: Daniel Thompson Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kdb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 810d78a8d14c..4b666643d69f 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -17,7 +17,7 @@ #include "trace.h" #include "trace_output.h" -static void ftrace_dump_buf(int skip_lines, long cpu_file) +static void ftrace_dump_buf(int skip_entries, long cpu_file) { /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; @@ -70,11 +70,11 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) kdb_printf("---------------------------------\n"); cnt++; - if (!skip_lines) { + if (!skip_entries) { print_trace_line(&iter); trace_printk_seq(&iter.seq); } else { - skip_lines--; + skip_entries--; } if (KDB_FLAG(CMD_INTERRUPT)) @@ -106,7 +106,7 @@ out: */ static int kdb_ftdump(int argc, const char **argv) { - int skip_lines = 0; + int skip_entries = 0; long cpu_file; char *cp; @@ -114,9 +114,9 @@ static int kdb_ftdump(int argc, const char **argv) return KDB_ARGCOUNT; if (argc) { - skip_lines = simple_strtol(argv[1], &cp, 0); + skip_entries = simple_strtol(argv[1], &cp, 0); if (*cp) - skip_lines = 0; + skip_entries = 0; } if (argc == 2) { @@ -129,7 +129,7 @@ static int kdb_ftdump(int argc, const char **argv) } kdb_trap_printk++; - ftrace_dump_buf(skip_lines, cpu_file); + ftrace_dump_buf(skip_entries, cpu_file); kdb_trap_printk--; return 0; @@ -137,7 +137,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { - kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", + kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]", "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); return 0; } From ecffc8a8c7301f6f3c731ba23e38cd049a046416 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:05 -0700 Subject: [PATCH 25/44] tracing: Add trace_total_entries() / trace_total_entries_cpu() These two new exported functions will be used in a future patch by kdb_ftdump() to quickly skip all but the last few trace entries. Link: http://lkml.kernel.org/r/20190319171206.97107-2-dianders@chromium.org Acked-by: Daniel Thompson Suggested-by: Steven Rostedt Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 65 ++++++++++++++++++++++++++++++++++---------- kernel/trace/trace.h | 3 ++ 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2bc18de7f0dc..dcb9adb44be9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3492,34 +3492,69 @@ static void s_stop(struct seq_file *m, void *p) trace_event_read_unlock(); } +static void +get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, + unsigned long *entries, int cpu) +{ + unsigned long count; + + count = ring_buffer_entries_cpu(buf->buffer, cpu); + /* + * If this buffer has skipped entries, then we hold all + * entries for the trace and we need to ignore the + * ones before the time stamp. + */ + if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { + count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; + /* total is the same as the entries */ + *total = count; + } else + *total = count + + ring_buffer_overrun_cpu(buf->buffer, cpu); + *entries = count; +} + static void get_total_entries(struct trace_buffer *buf, unsigned long *total, unsigned long *entries) { - unsigned long count; + unsigned long t, e; int cpu; *total = 0; *entries = 0; for_each_tracing_cpu(cpu) { - count = ring_buffer_entries_cpu(buf->buffer, cpu); - /* - * If this buffer has skipped entries, then we hold all - * entries for the trace and we need to ignore the - * ones before the time stamp. - */ - if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { - count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; - /* total is the same as the entries */ - *total += count; - } else - *total += count + - ring_buffer_overrun_cpu(buf->buffer, cpu); - *entries += count; + get_total_entries_cpu(buf, &t, &e, cpu); + *total += t; + *entries += e; } } +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + + return entries; +} + +unsigned long trace_total_entries(struct trace_array *tr) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries(&tr->trace_buffer, &total, &entries); + + return entries; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index da00a3d508c1..33f14b9e78b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -721,6 +721,9 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); +unsigned long trace_total_entries(struct trace_array *tr); + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, From 03197fc02b356606355d7ede343b18e3e3737771 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:06 -0700 Subject: [PATCH 26/44] tracing: kdb: Allow ftdump to skip all but the last few entries The 'ftdump' command in kdb is currently a bit of a last resort, at least if you have lots of traces turned on. It's going to print a whole boatload of data out your serial port which is probably running at 115200. This could easily take many, many minutes. Usually you're most interested in what's at the _end_ of the ftrace buffer, AKA what happened most recently. That means you've got to wait the full time for the dump. The 'ftdump' command does attempt to help you a little bit by allowing you to skip a fixed number of entries. Unfortunately it provides no way for you to know how many entries you should skip. Let's do similar to python and allow you to use a negative number to indicate that you want to skip all entries except the last few. This allows you to quickly see what you want. Note that we also change the printout in ftdump to print the (positive) number of entries actually skipped since that could be helpful to know when you've specified a negative skip count. Link: http://lkml.kernel.org/r/20190319171206.97107-3-dianders@chromium.org Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kdb.c | 45 +++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 4b666643d69f..6c1ae6b752d1 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -17,29 +17,25 @@ #include "trace.h" #include "trace_output.h" +static struct trace_iterator iter; +static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; + static void ftrace_dump_buf(int skip_entries, long cpu_file) { - /* use static because iter can be a bit big for the stack */ - static struct trace_iterator iter; - static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; struct trace_array *tr; unsigned int old_userobj; int cnt = 0, cpu; - trace_init_global_iter(&iter); - iter.buffer_iter = buffer_iter; tr = iter.tr; - for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - old_userobj = tr->trace_flags; /* don't look at user memory in panic mode */ tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; kdb_printf("Dumping ftrace buffer:\n"); + if (skip_entries) + kdb_printf("(skipping %d entries)\n", skip_entries); /* reset all but tr, trace, and overruns */ memset(&iter.seq, 0, @@ -89,10 +85,6 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) out: tr->trace_flags = old_userobj; - for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - for_each_tracing_cpu(cpu) { if (iter.buffer_iter[cpu]) { ring_buffer_read_finish(iter.buffer_iter[cpu]); @@ -109,6 +101,8 @@ static int kdb_ftdump(int argc, const char **argv) int skip_entries = 0; long cpu_file; char *cp; + int cnt; + int cpu; if (argc > 2) return KDB_ARGCOUNT; @@ -129,7 +123,29 @@ static int kdb_ftdump(int argc, const char **argv) } kdb_trap_printk++; + + trace_init_global_iter(&iter); + iter.buffer_iter = buffer_iter; + + for_each_tracing_cpu(cpu) { + atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + + /* A negative skip_entries means skip all but the last entries */ + if (skip_entries < 0) { + if (cpu_file == RING_BUFFER_ALL_CPUS) + cnt = trace_total_entries(NULL); + else + cnt = trace_total_entries_cpu(NULL, cpu_file); + skip_entries = max(cnt + skip_entries, 0); + } + ftrace_dump_buf(skip_entries, cpu_file); + + for_each_tracing_cpu(cpu) { + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + kdb_trap_printk--; return 0; @@ -138,7 +154,8 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]", - "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); + "Dump ftrace log; -skip dumps last #entries", 0, + KDB_ENABLE_ALWAYS_SAFE); return 0; } From 2700fefdb2d9751c416ad56897e27d41e409324a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 30 Nov 2018 12:39:17 -0600 Subject: [PATCH 27/44] x86_64: Add gap to int3 to allow for call emulation To allow an int3 handler to emulate a call instruction, it must be able to push a return address onto the stack. Add a gap to the stack to allow the int3 handler to push the return address and change the return from int3 to jump straight to the emulated called function target. Link: http://lkml.kernel.org/r/20181130183917.hxmti5josgq4clti@treble Link: http://lkml.kernel.org/r/20190502162133.GX2623@hirez.programming.kicks-ass.net [ Note, this is needed to allow Live Kernel Patching to not miss calling a patched function when tracing is enabled. -- Steven Rostedt ] Cc: stable@vger.kernel.org Fixes: b700e7f03df5 ("livepatch: kernel: add support for live patching") Tested-by: Nicolai Stange Reviewed-by: Nicolai Stange Reviewed-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (VMware) --- arch/x86/entry/entry_64.S | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1f0efdb7b629..27fcc6fbdd52 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -899,6 +899,20 @@ ENTRY(\sym) jnz .Lfrom_usermode_switch_stack_\@ .endif + .if \create_gap == 1 + /* + * If coming from kernel space, create a 6-word gap to allow the + * int3 handler to emulate a call instruction. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + .if \paranoid call paranoid_entry .else @@ -1130,7 +1144,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV From 4b33dadf37666c0860b88f9e52a16d07bf6d0b03 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 May 2019 15:11:17 +0200 Subject: [PATCH 28/44] x86_64: Allow breakpoints to emulate call instructions In order to allow breakpoints to emulate call instructions, they need to push the return address onto the stack. The x86_64 int3 handler adds a small gap to allow the stack to grow some. Use this gap to add the return address to be able to emulate a call instruction at the breakpoint location. These helper functions are added: int3_emulate_jmp(): changes the location of the regs->ip to return there. (The next two are only for x86_64) int3_emulate_push(): to push the address onto the gap in the stack int3_emulate_call(): push the return address and change regs->ip Cc: Andy Lutomirski Cc: Nicolai Stange Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: the arch/x86 maintainers Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Shuah Khan Cc: Konrad Rzeszutek Wilk Cc: Tim Chen Cc: Sebastian Andrzej Siewior Cc: Mimi Zohar Cc: Juergen Gross Cc: Nick Desaulniers Cc: Nayna Jain Cc: Masahiro Yamada Cc: Joerg Roedel Cc: "open list:KERNEL SELFTEST FRAMEWORK" Cc: stable@vger.kernel.org Fixes: b700e7f03df5 ("livepatch: kernel: add support for live patching") Tested-by: Nicolai Stange Reviewed-by: Nicolai Stange Reviewed-by: Masami Hiramatsu Signed-off-by: Peter Zijlstra (Intel) [ Modified to only work for x86_64 and added comment to int3_emulate_push() ] Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/text-patching.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index e85ff65c43c3..05861cc08787 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,4 +39,32 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +#define INT3_INSN_SIZE 1 +#define CALL_INSN_SIZE 5 + +#ifdef CONFIG_X86_64 +static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + /* + * The int3 handler in entry_64.S adds a gap between the + * stack where the break point happened, and the saving of + * pt_regs. We can extend the original stack because of + * this gap. See the idtentry macro's create_gap option. + */ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} +#endif + #endif /* _ASM_X86_TEXT_PATCHING_H */ From 9e298e8604088a600d8100a111a532a9d342af09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 May 2019 15:11:17 +0200 Subject: [PATCH 29/44] ftrace/x86_64: Emulate call function while updating in breakpoint handler Nicolai Stange discovered[1] that if live kernel patching is enabled, and the function tracer started tracing the same function that was patched, the conversion of the fentry call site during the translation of going from calling the live kernel patch trampoline to the iterator trampoline, would have as slight window where it didn't call anything. As live kernel patching depends on ftrace to always call its code (to prevent the function being traced from being called, as it will redirect it). This small window would allow the old buggy function to be called, and this can cause undesirable results. Nicolai submitted new patches[2] but these were controversial. As this is similar to the static call emulation issues that came up a while ago[3]. But after some debate[4][5] adding a gap in the stack when entering the breakpoint handler allows for pushing the return address onto the stack to easily emulate a call. [1] http://lkml.kernel.org/r/20180726104029.7736-1-nstange@suse.de [2] http://lkml.kernel.org/r/20190427100639.15074-1-nstange@suse.de [3] http://lkml.kernel.org/r/3cf04e113d71c9f8e4be95fb84a510f085aa4afa.1541711457.git.jpoimboe@redhat.com [4] http://lkml.kernel.org/r/CAHk-=wh5OpheSU8Em_Q3Hg8qw_JtoijxOdPtHru6d+5K8TWM=A@mail.gmail.com [5] http://lkml.kernel.org/r/CAHk-=wjvQxY4DvPrJ6haPgAa6b906h=MwZXO6G8OtiTGe=N7_w@mail.gmail.com [ Live kernel patching is not implemented on x86_32, thus the emulate calls are only for x86_64. ] Cc: Andy Lutomirski Cc: Nicolai Stange Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: the arch/x86 maintainers Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Shuah Khan Cc: Konrad Rzeszutek Wilk Cc: Tim Chen Cc: Sebastian Andrzej Siewior Cc: Mimi Zohar Cc: Juergen Gross Cc: Nick Desaulniers Cc: Nayna Jain Cc: Masahiro Yamada Cc: Joerg Roedel Cc: "open list:KERNEL SELFTEST FRAMEWORK" Cc: stable@vger.kernel.org Fixes: b700e7f03df5 ("livepatch: kernel: add support for live patching") Tested-by: Nicolai Stange Reviewed-by: Nicolai Stange Reviewed-by: Masami Hiramatsu Signed-off-by: Peter Zijlstra (Intel) [ Changed to only implement emulated calls for x86_64 ] Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ef49517f6bb2..bd553b3af22e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_DYNAMIC_FTRACE @@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } static unsigned long ftrace_update_func; +static unsigned long ftrace_update_func_call; static int update_ftrace_func(unsigned long ip, void *new) { @@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -294,13 +298,28 @@ int ftrace_int3_handler(struct pt_regs *regs) if (WARN_ON_ONCE(!regs)) return 0; - ip = regs->ip - 1; - if (!ftrace_location(ip) && !is_ftrace_caller(ip)) - return 0; + ip = regs->ip - INT3_INSN_SIZE; - regs->ip += MCOUNT_INSN_SIZE - 1; +#ifdef CONFIG_X86_64 + if (ftrace_location(ip)) { + int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); + return 1; + } else if (is_ftrace_caller(ip)) { + if (!ftrace_update_func_call) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } + int3_emulate_call(regs, ftrace_update_func_call); + return 1; + } +#else + if (ftrace_location(ip) || is_ftrace_caller(ip)) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } +#endif - return 1; + return 0; } NOKPROBE_SYMBOL(ftrace_int3_handler); @@ -859,6 +878,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -960,6 +981,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; + ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new); From 4dd537aca25dd2e0e8aca8b8923930cbe6240003 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:55:41 +0900 Subject: [PATCH 30/44] tracing: uprobes: Re-enable $comm support for uprobe events Since commit 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") dropped the $comm support from uprobe events, this re-enables it. For $comm support, uses strlcpy() instead of strncpy_from_user() to copy current task's comm. Because it is in the kernel space, strncpy_from_user() always fails to copy the comm. This also uses strlen() instead of strnlen_user() to measure the length of the comm. Note that this uses -ECOMM as a token value to fetch the comm string. If the user-space pointer points -ECOMM, it will be translated to task->comm. Link: http://lkml.kernel.org/r/155723734162.9149.4042756162201097965.stgit@devnote2 Fixes: 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") Reported-by: Andreas Ziegler Acked-by: Andreas Ziegler Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.h | 1 + kernel/trace/trace_uprobe.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index b7737666c1a8..f9a8c632188b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -124,6 +124,7 @@ struct fetch_insn { /* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */ #define FETCH_INSN_MAX 16 +#define FETCH_TOKEN_COMM (-ECOMM) /* Fetch type information table */ struct fetch_type { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index cd8750a72768..eb7e06b54741 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -156,7 +156,10 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (unlikely(!maxlen)) return -ENOMEM; - ret = strncpy_from_user(dst, src, maxlen); + if (addr == FETCH_TOKEN_COMM) + ret = strlcpy(dst, current->comm, maxlen); + else + ret = strncpy_from_user(dst, src, maxlen); if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; @@ -180,7 +183,10 @@ fetch_store_strlen(unsigned long addr) int len; void __user *vaddr = (void __force __user *) addr; - len = strnlen_user(vaddr, MAX_STRING_SIZE); + if (addr == FETCH_TOKEN_COMM) + len = strlen(current->comm) + 1; + else + len = strnlen_user(vaddr, MAX_STRING_SIZE); return (len > MAX_STRING_SIZE) ? 0 : len; } @@ -220,6 +226,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_IMM: val = code->immediate; break; + case FETCH_OP_COMM: + val = FETCH_TOKEN_COMM; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; From 489fe0096b19b664b8f3bed0fd604d617a229b5a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:55:52 +0900 Subject: [PATCH 31/44] tracing: probeevent: Do not accumulate on ret variable Do not accumulate strlen result on "ret" local variable, because it is accumulated on "total" local variable for array case. Link: http://lkml.kernel.org/r/155723735237.9149.3192150444705457531.stgit@devnote2 Fixes: 40b53b771806 ("tracing: probeevent: Add array type support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 4737bb8c07a3..c30c61f12ddd 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -88,7 +88,7 @@ stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { if (code->op == FETCH_OP_ST_STRING) { - ret += fetch_store_strlen(val + code->offset); + ret = fetch_store_strlen(val + code->offset); code++; goto array; } else From 3dd1f7f24f8ceec00bbbc364c2ac3c893f0fdc4c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:56:02 +0900 Subject: [PATCH 32/44] tracing: probeevent: Fix to make the type of $comm string Fix to make the type of $comm "string". If we set the other type to $comm argument, it shows meaningless value or wrong data. Currently probe events allow us to set string array type (e.g. ":string[2]"), or other digit types like x8 on $comm. But since clearly $comm is just a string data, it should not be fetched by other types including array. Link: http://lkml.kernel.org/r/155723736241.9149.14582064184468574539.stgit@devnote2 Cc: Andreas Ziegler Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4cc2d467d34c..e0d1d5353464 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -533,13 +533,14 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } } - /* - * The default type of $comm should be "string", and it can't be - * dereferenced. - */ - if (!t && strcmp(arg, "$comm") == 0) + + /* Since $comm can not be dereferred, we can find $comm by strcmp */ + if (strcmp(arg, "$comm") == 0) { + /* The type of $comm must be "string", and not an array. */ + if (parg->count || (t && strcmp(t, "string"))) + return -EINVAL; parg->type = find_fetch_type("string"); - else + } else parg->type = find_fetch_type(t); if (!parg->type) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE); From 5c173bedb24dfcc8e412d3c3f111c504e4408dd5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 1 Nov 2018 11:46:40 -0400 Subject: [PATCH 33/44] ring-buffer: Fix mispelling of Calculate It's not "Caculate". Link: http://lkml.kernel.org/r/20181101154640.23162-1-tiny.windzz@gmail.com Signed-off-by: Yangtao Li Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index ffba6789c0e2..0564f6db0561 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -362,7 +362,7 @@ static void ring_buffer_producer(void) hit--; /* make it non zero */ } - /* Caculate the average time in nanosecs */ + /* Calculate the average time in nanosecs */ avg = NSEC_PER_MSEC / (hit + missed); trace_printk("%ld ns per entry\n", avg); } From 0f5e5a3ab7fa1c09370a4d709ad6157457d5b8b6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Mar 2019 09:17:57 +0100 Subject: [PATCH 34/44] tracing: Eliminate const char[] auto variables Automatic const char[] variables cause unnecessary code generation. For example, the this_mod variable leads to 3f04: 48 b8 5f 5f 74 68 69 73 5f 6d movabs $0x6d5f736968745f5f,%rax # __this_m 3f0e: 4c 8d 44 24 02 lea 0x2(%rsp),%r8 3f13: 48 8d 7c 24 10 lea 0x10(%rsp),%rdi 3f18: 48 89 44 24 02 mov %rax,0x2(%rsp) 3f1d: 4c 89 e9 mov %r13,%rcx 3f20: b8 65 00 00 00 mov $0x65,%eax # e 3f25: 48 c7 c2 00 00 00 00 mov $0x0,%rdx 3f28: R_X86_64_32S .rodata.str1.1+0x18d 3f2c: be 48 00 00 00 mov $0x48,%esi 3f31: c7 44 24 0a 6f 64 75 6c movl $0x6c75646f,0xa(%rsp) # odul 3f39: 66 89 44 24 0e mov %ax,0xe(%rsp) i.e., the string gets built on the stack at runtime. Similar code can be found for the other instances I'm replacing here. Putting the string in .rodata reduces the combined .text+.rodata size and saves time and stack space at runtime. The simplest fix, and what I've done for the this_mod case, is to just make the variable static. However, for the "" case where the same string is used twice, that prevents the linker from merging those two literals, so instead use a macro - that also keeps the two instances automatically in sync (instead of only the compile-time strlen expression). Finally, for the two runs of spaces, it turns out that the "build these strings on the stack" is not the worst part of what gcc does - it turns print_func_help_header_irq() into "if (tgid) { /* print_event_info + five seq_printf calls */ } else { /* print event_info + another five seq_printf */}". Taking inspiration from a suggestion from Al Viro, use %.*s to make snprintf either stop after the first two spaces or print the whole string. As a bonus, the seq_printfs now fit on single lines (at least, they are not longer than the existing ones in the function just above), making it easier to see that the ascii art lines up. x86-64 defconfig + CONFIG_FUNCTION_TRACER: $ scripts/stackdelta /tmp/stackusage.{0,1} ./kernel/trace/ftrace.c ftrace_mod_callback 152 136 -16 ./kernel/trace/trace.c trace_default_header 56 32 -24 ./kernel/trace/trace.c tracing_mark_raw_write 96 72 -24 ./kernel/trace/trace.c tracing_mark_write 104 80 -24 bloat-o-meter add/remove: 1/0 grow/shrink: 0/4 up/down: 14/-375 (-361) Function old new delta this_mod - 14 +14 ftrace_mod_callback 577 542 -35 tracing_mark_raw_write 444 374 -70 tracing_mark_write 616 540 -76 trace_default_header 600 406 -194 Link: http://lkml.kernel.org/r/20190320081757.6037-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.c | 34 +++++++++++++--------------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 433a64f49532..7765a53f1006 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3875,7 +3875,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, static bool module_exists(const char *module) { /* All modules have the symbol __this_module */ - const char this_mod[] = "__this_module"; + static const char this_mod[] = "__this_module"; char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dcb9adb44be9..3259019cc66d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3593,25 +3593,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char tgid_space[] = " "; - const char space[] = " "; + const char *space = " "; + int prec = tgid ? 10 : 2; print_event_info(buf, m); - seq_printf(m, "# %s _-----=> irqs-off\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s / _----=> need-resched\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s| / _---=> hardirq/softirq\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s|| / _--=> preempt-depth\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s||| / delay\n", - tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", - tgid ? " TGID " : space); - seq_printf(m, "# | | %s | |||| | |\n", - tgid ? " | " : space); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void @@ -6342,13 +6335,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct ring_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; - const char faulted[] = ""; ssize_t written; int size; int len; /* Used in tracing_mark_raw_write() as well */ -#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */ +#define FAULTED_STR "" +#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ if (tracing_disabled) return -EINVAL; @@ -6380,7 +6373,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); if (len) { - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); cnt = FAULTED_SIZE; written = -EFAULT; } else @@ -6421,7 +6414,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct ring_buffer *buffer; struct raw_data_entry *entry; - const char faulted[] = ""; unsigned long irq_flags; ssize_t written; int size; @@ -6461,7 +6453,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); if (len) { entry->id = -1; - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); written = -EFAULT; } else written = cnt; From bfcd631eb6de474d8e097fd0f9f840fdf7272a1d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Nov 2018 13:23:12 +0000 Subject: [PATCH 35/44] tracing: Fix white space issues in parse_pred() function Trivial fix to clean up an indentation issue, a whole chunk of code has an extra space in the indentation. Link: http://lkml.kernel.org/r/20181109132312.20994-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 180ecb390baa..d3e59312ef40 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1222,30 +1222,30 @@ static int parse_pred(const char *str, void *data, * (perf doesn't use it) and grab everything. */ if (strcmp(field->name, "ip") != 0) { - parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); - goto err_free; - } - pred->fn = filter_pred_none; + parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); + goto err_free; + } + pred->fn = filter_pred_none; - /* - * Quotes are not required, but if they exist then we need - * to read them till we hit a matching one. - */ - if (str[i] == '\'' || str[i] == '"') - q = str[i]; - else - q = 0; + /* + * Quotes are not required, but if they exist then we need + * to read them till we hit a matching one. + */ + if (str[i] == '\'' || str[i] == '"') + q = str[i]; + else + q = 0; - for (i++; str[i]; i++) { - if (q && str[i] == q) - break; - if (!q && (str[i] == ')' || str[i] == '&' || - str[i] == '|')) - break; - } - /* Skip quotes */ - if (q) - s++; + for (i++; str[i]; i++) { + if (q && str[i] == q) + break; + if (!q && (str[i] == ')' || str[i] == '&' || + str[i] == '|')) + break; + } + /* Skip quotes */ + if (q) + s++; len = i - s; if (len >= MAX_FILTER_STR_VAL) { parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); From 6fc2171c5c03672bae71d04a0f5fa88cc9c3b4e2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 30 Nov 2018 15:56:22 +0100 Subject: [PATCH 36/44] tracing: Allow RCU to run between postponed startup tests When building a allmodconfig kernel for arm64 and boot that in qemu, CONFIG_FTRACE_STARTUP_TEST gets enabled and that takes time so the watchdog expires and prints out a message like this: 'watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:1]' Depending on what the what test gets called from init_trace_selftests() it stays minutes in the loop. Rework so that function cond_resched() gets called in the init_trace_selftests loop. Link: http://lkml.kernel.org/r/20181130145622.26334-1-anders.roxell@linaro.org Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3259019cc66d..4269af5905e4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1722,6 +1722,10 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); list_for_each_entry_safe(p, n, &postponed_selftests, list) { + /* This loop can take minutes when sanitizers are enabled, so + * lets make sure we allow RCU processing. + */ + cond_resched(); ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ if (ret < 0) { From cbe08bcbbe787315c425dde284dcb715cfbf3f39 Mon Sep 17 00:00:00 2001 From: Elazar Leibovich Date: Mon, 31 Dec 2018 13:58:37 +0200 Subject: [PATCH 37/44] tracing: Fix partial reading of trace event's id file When reading only part of the id file, the ppos isn't tracked correctly. This is taken care by simple_read_from_buffer. Reading a single byte, and then the next byte would result EOF. While this seems like not a big deal, this breaks abstractions that reads information from files unbuffered. See for example https://github.com/golang/go/issues/29399 This code was mentioned as problematic in commit cd458ba9d5a5 ("tracing: Do not (ab)use trace_seq in event_id_read()") An example C code that show this bug is: #include #include #include #include #include #include int main(int argc, char **argv) { if (argc < 2) return 1; int fd = open(argv[1], O_RDONLY); char c; read(fd, &c, 1); printf("First %c\n", c); read(fd, &c, 1); printf("Second %c\n", c); } Then run with, e.g. sudo ./a.out /sys/kernel/debug/tracing/events/tcp/tcp_set_state/id You'll notice you're getting the first character twice, instead of the first two characters in the id file. Link: http://lkml.kernel.org/r/20181231115837.4932-1-elazar@lightbitslabs.com Cc: Orit Wasserman Cc: Oleg Nesterov Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 23725aeeab10b ("ftrace: provide an id file for each event") Signed-off-by: Elazar Leibovich Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 81c038ed6cee..0ce3db67f556 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1319,9 +1319,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) char buf[32]; int len; - if (*ppos) - return 0; - if (unlikely(!id)) return -ENODEV; From 8623b00676f16ed8972008095deca2c8e2b97a37 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Jan 2019 22:34:08 -0600 Subject: [PATCH 38/44] tracing: Replace kzalloc with kcalloc Replace kzalloc() function with its 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a, b, gfp) This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20190115043408.GA23456@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e0d1d5353464..a347faced959 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -558,7 +558,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, parg->count); } - code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); + code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); if (!code) return -ENOMEM; code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; @@ -637,7 +637,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, code->op = FETCH_OP_END; /* Shrink down the code buffer */ - parg->code = kzalloc(sizeof(*code) * (code - tmp + 1), GFP_KERNEL); + parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL); if (!parg->code) ret = -ENOMEM; else From b9416997603ef7e17d4de10b6408f19da2feb72c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat (VMware)" Date: Mon, 28 Jan 2019 17:55:53 -0800 Subject: [PATCH 39/44] tracing: Fix documentation about disabling options using trace_options To disable a tracing option using the trace_options file, the option name needs to be prefixed with 'no', and not suffixed, as the README states. Fix it. Link: http://lkml.kernel.org/r/154872690031.47356.5739053380942044586.stgit@srivatsa-ubuntu Signed-off-by: Srivatsa S. Bhat (VMware) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4269af5905e4..a3a6945a7732 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4755,7 +4755,7 @@ static const char readme_msg[] = " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" - "\t\t\t Disable an option by adding a suffix 'no' to the\n" + "\t\t\t Disable an option by prefixing 'no' to the\n" "\t\t\t option name\n" " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" #ifdef CONFIG_DYNAMIC_FTRACE From a15fd609ad53a631a927c6680e8fb606f42a712b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 20 Mar 2019 10:26:17 -0700 Subject: [PATCH 40/44] tracing: Simplify "if" macro code Peter Zijlstra noticed that with CONFIG_PROFILE_ALL_BRANCHES, the "if" macro converts the conditional to an array index. This can cause GCC to create horrible code. When there are nested ifs, the generated code uses register values to encode branching decisions. Josh Poimboeuf found that replacing the define "if" macro from using the condition as an array index and incrementing the branch statics with an if statement itself, reduced the asm complexity and shrinks the generated code quite a bit. But this can be simplified even further by replacing the internal if statement with a ternary operator. Link: https://lkml.kernel.org/r/20190307174802.46fmpysxyo35hh43@treble Link: http://lkml.kernel.org/r/CAHk-=wiALN3jRuzARpwThN62iKd476Xj-uom+YnLZ4=eqcz7xQ@mail.gmail.com Reported-by: Peter Zijlstra (Intel) Reported-by: Josh Poimboeuf Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (VMware) --- include/linux/compiler.h | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 445348facea9..8aaf7cd026b0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -53,23 +53,24 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * "Define 'is'", Bill Clinton * "Define 'if'", Steven Rostedt */ -#define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) -#define __trace_if(cond) \ - if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ - ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __aligned(4) \ - __section("_ftrace_branch") \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = !!(cond); \ - ______f.miss_hit[______r]++; \ - ______r; \ - })) +#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) ) + +#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond)) + +#define __trace_if_value(cond) ({ \ + static struct ftrace_branch_data \ + __aligned(4) \ + __section("_ftrace_branch") \ + __if_trace = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + (cond) ? \ + (__if_trace.miss_hit[1]++,1) : \ + (__if_trace.miss_hit[0]++,0); \ +}) + #endif /* CONFIG_PROFILE_ALL_BRANCHES */ #else From 518049d9d3e25dcd7be6e3d728e86f292ad0c922 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 10 May 2019 12:05:46 -0400 Subject: [PATCH 41/44] ftrace/x86_32: Remove support for non DYNAMIC_FTRACE When DYNAMIC_FTRACE is enabled in the kernel, all the functions that can be traced by the function tracer have a "nop" placeholder at the start of the function. When function tracing is enabled, the nop is converted into a call to the tracing infrastructure where the functions get traced. This also allows for specifying specific functions to trace, and a lot of infrastructure is built on top of this. When DYNAMIC_FTRACE is not enabled, all the functions have a call to the ftrace trampoline. A check is made to see if a function pointer is the ftrace_stub or not, and if it is not, it calls the function pointer to trace the code. This adds over 10% overhead to the kernel even when tracing is disabled. When an architecture supports DYNAMIC_FTRACE there really is no reason to use the static tracing. I have kept non DYNAMIC_FTRACE available in x86 so that the generic code for non DYNAMIC_FTRACE can be tested. There is no reason to support non DYNAMIC_FTRACE for both x86_64 and x86_32. As the non DYNAMIC_FTRACE for x86_32 does not even support fentry, and we want to remove mcount completely, there's no reason to keep non DYNAMIC_FTRACE around for x86_32. Signed-off-by: Steven Rostedt (VMware) --- arch/x86/Kconfig | 11 +++++++++++ arch/x86/kernel/ftrace_32.S | 39 ------------------------------------- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5ad92419be19..0544041ae3a2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -31,6 +31,17 @@ config X86_64 select X86_DEV_DMA_OPS select ARCH_HAS_SYSCALL_WRAPPER +config FORCE_DYNAMIC_FTRACE + def_bool y + depends on X86_32 + depends on FUNCTION_TRACER + select DYNAMIC_FTRACE + help + We keep the static function tracing (!DYNAMIC_FTRACE) around + in order to test the non static function tracing in the + generic code, as other architectures still use it. But we + only need to keep it around for x86_64. No need to keep it + for x86_32. For x86_32, force DYNAMIC_FTRACE. # # Arch settings # diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 4c8440de3355..459e6b4a19bc 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -18,8 +18,6 @@ EXPORT_SYMBOL(__fentry__) EXPORT_SYMBOL(mcount) #endif -#ifdef CONFIG_DYNAMIC_FTRACE - /* mcount uses a frame pointer even if CONFIG_FRAME_POINTER is not set */ #if !defined(CC_USING_FENTRY) || defined(CONFIG_FRAME_POINTER) # define USING_FRAME_POINTER @@ -170,43 +168,6 @@ GLOBAL(ftrace_regs_call) lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ jmp .Lftrace_ret -#else /* ! CONFIG_DYNAMIC_FTRACE */ - -ENTRY(function_hook) - cmpl $__PAGE_OFFSET, %esp - jb ftrace_stub /* Paging not enabled yet? */ - - cmpl $ftrace_stub, ftrace_trace_function - jnz .Ltrace -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpl $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpl $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif -.globl ftrace_stub -ftrace_stub: - ret - - /* taken from glibc */ -.Ltrace: - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - movl 0x4(%ebp), %edx - subl $MCOUNT_INSN_SIZE, %eax - - movl ftrace_trace_function, %ecx - CALL_NOSPEC %ecx - - popl %edx - popl %ecx - popl %eax - jmp ftrace_stub -END(function_hook) -#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) From 562e14f72292249e52e6346a9e3a30be652b0cf6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 9 May 2019 15:32:05 -0400 Subject: [PATCH 42/44] ftrace/x86: Remove mcount support There's two methods of enabling function tracing in Linux on x86. One is with just "gcc -pg" and the other is "gcc -pg -mfentry". The former will use calls to a special function "mcount" after the frame is set up in all C functions. The latter will add calls to a special function called "fentry" as the very first instruction of all C functions. At compile time, there is a check to see if gcc supports, -mfentry, and if it does, it will use that, because it is more versatile and less error prone for function tracing. Starting with v4.19, the minimum gcc supported to build the Linux kernel, was raised to version 4.6. That also happens to be the first gcc version to support -mfentry. Since on x86, using gcc versions from 4.6 and beyond will unconditionally enable the -mfentry, it will no longer use mcount as the method for inserting calls into the C functions of the kernel. This means that there is no point in continuing to maintain mcount in x86. Remove support for using mcount. This makes the code less complex, and will also allow it to be simplified in the future. Acked-by: Peter Zijlstra (Intel) Acked-by: Jiri Kosina Acked-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/ftrace.h | 8 +++---- arch/x86/include/asm/livepatch.h | 3 --- arch/x86/kernel/ftrace_32.S | 36 +++++--------------------------- arch/x86/kernel/ftrace_64.S | 28 +------------------------ 4 files changed, 9 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index cf350639e76d..287f1f7b2e52 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,12 +3,10 @@ #define _ASM_X86_FTRACE_H #ifdef CONFIG_FUNCTION_TRACER -#ifdef CC_USING_FENTRY -# define MCOUNT_ADDR ((unsigned long)(__fentry__)) -#else -# define MCOUNT_ADDR ((unsigned long)(mcount)) -# define HAVE_FUNCTION_GRAPH_FP_TEST +#ifndef CC_USING_FENTRY +# error Compiler does not support fentry? #endif +# define MCOUNT_ADDR ((unsigned long)(__fentry__)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index ed80003ce3e2..2f2bdf0662f8 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -26,9 +26,6 @@ static inline int klp_check_compiler_support(void) { -#ifndef CC_USING_FENTRY - return 1; -#endif return 0; } diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 459e6b4a19bc..2ba914a34b06 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -10,20 +10,10 @@ #include #include -#ifdef CC_USING_FENTRY # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) -#else -# define function_hook mcount -EXPORT_SYMBOL(mcount) -#endif -/* mcount uses a frame pointer even if CONFIG_FRAME_POINTER is not set */ -#if !defined(CC_USING_FENTRY) || defined(CONFIG_FRAME_POINTER) -# define USING_FRAME_POINTER -#endif - -#ifdef USING_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER # define MCOUNT_FRAME 1 /* using frame = true */ #else # define MCOUNT_FRAME 0 /* using frame = false */ @@ -35,8 +25,7 @@ END(function_hook) ENTRY(ftrace_caller) -#ifdef USING_FRAME_POINTER -# ifdef CC_USING_FENTRY +#ifdef CONFIG_FRAME_POINTER /* * Frame pointers are of ip followed by bp. * Since fentry is an immediate jump, we are left with @@ -47,7 +36,7 @@ ENTRY(ftrace_caller) pushl %ebp movl %esp, %ebp pushl 2*4(%esp) /* function ip */ -# endif + /* For mcount, the function ip is directly above */ pushl %ebp movl %esp, %ebp @@ -57,7 +46,7 @@ ENTRY(ftrace_caller) pushl %edx pushl $0 /* Pass NULL as regs pointer */ -#ifdef USING_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER /* Load parent ebp into edx */ movl 4*4(%esp), %edx #else @@ -80,13 +69,11 @@ ftrace_call: popl %edx popl %ecx popl %eax -#ifdef USING_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER popl %ebp -# ifdef CC_USING_FENTRY addl $4,%esp /* skip function ip */ popl %ebp /* this is the orig bp */ addl $4, %esp /* skip parent ip */ -# endif #endif .Lftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -131,11 +118,7 @@ ENTRY(ftrace_regs_caller) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ -#ifdef CC_USING_FENTRY movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ -#else - movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ -#endif movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ pushl %esp /* Save pt_regs as 4th parameter */ @@ -176,13 +159,8 @@ ENTRY(ftrace_graph_caller) pushl %edx movl 3*4(%esp), %eax /* Even with frame pointers, fentry doesn't have one here */ -#ifdef CC_USING_FENTRY lea 4*4(%esp), %edx movl $0, %ecx -#else - lea 0x4(%ebp), %edx - movl (%ebp), %ecx -#endif subl $MCOUNT_INSN_SIZE, %eax call prepare_ftrace_return popl %edx @@ -195,11 +173,7 @@ END(ftrace_graph_caller) return_to_handler: pushl %eax pushl %edx -#ifdef CC_USING_FENTRY movl $0, %eax -#else - movl %ebp, %eax -#endif call ftrace_return_to_handler movl %eax, %ecx popl %edx diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 75f2b36b41a6..10eb2760ef2c 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -13,22 +13,12 @@ .code64 .section .entry.text, "ax" -#ifdef CC_USING_FENTRY # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) -#else -# define function_hook mcount -EXPORT_SYMBOL(mcount) -#endif #ifdef CONFIG_FRAME_POINTER -# ifdef CC_USING_FENTRY /* Save parent and function stack frames (rip and rbp) */ # define MCOUNT_FRAME_SIZE (8+16*2) -# else -/* Save just function stack frame (rip and rbp) */ -# define MCOUNT_FRAME_SIZE (8+16) -# endif #else /* No need to save a stack frame */ # define MCOUNT_FRAME_SIZE 0 @@ -75,17 +65,13 @@ EXPORT_SYMBOL(mcount) * fentry is called before the stack frame is set up, where as mcount * is called afterward. */ -#ifdef CC_USING_FENTRY + /* Save the parent pointer (skip orig rbp and our return address) */ pushq \added+8*2(%rsp) pushq %rbp movq %rsp, %rbp /* Save the return address (now skip orig rbp, rbp and parent) */ pushq \added+8*3(%rsp) -#else - /* Can't assume that rip is before this (unless added was zero) */ - pushq \added+8(%rsp) -#endif pushq %rbp movq %rsp, %rbp #endif /* CONFIG_FRAME_POINTER */ @@ -113,12 +99,7 @@ EXPORT_SYMBOL(mcount) movq %rdx, RBP(%rsp) /* Copy the parent address into %rsi (second parameter) */ -#ifdef CC_USING_FENTRY movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi -#else - /* %rdx contains original %rbp */ - movq 8(%rdx), %rsi -#endif /* Move RIP to its proper location */ movq MCOUNT_REG_SIZE+\added(%rsp), %rdi @@ -303,15 +284,8 @@ ENTRY(ftrace_graph_caller) /* Saves rbp into %rdx and fills first parameter */ save_mcount_regs -#ifdef CC_USING_FENTRY leaq MCOUNT_REG_SIZE+8(%rsp), %rsi movq $0, %rdx /* No framepointers needed */ -#else - /* Save address of the return address of traced function */ - leaq 8(%rdx), %rsi - /* ftrace does sanity checks against frame pointers */ - movq (%rdx), %rdx -#endif call prepare_ftrace_return restore_mcount_regs From 56e33afd7757d5da2664fb797f2544ce167176be Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 10 May 2019 23:47:50 +0200 Subject: [PATCH 43/44] livepatch: Remove klp_check_compiler_support() The only purpose of klp_check_compiler_support() is to make sure that we are not using ftrace on x86 via mcount (because that's executed only after prologue has already happened, and that's too late for livepatching purposes). Now that mcount is not supported by ftrace any more, there is no need for klp_check_compiler_support() either. Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1905102346100.17054@cbobk.fhfr.pm Reported-by: Linus Torvalds Signed-off-by: Jiri Kosina Signed-off-by: Steven Rostedt (VMware) --- arch/powerpc/include/asm/livepatch.h | 5 ----- arch/s390/include/asm/livepatch.h | 5 ----- arch/x86/include/asm/livepatch.h | 5 ----- kernel/livepatch/core.c | 8 -------- 4 files changed, 23 deletions(-) diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 5070df19d463..c005aee5ea43 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -24,11 +24,6 @@ #include #ifdef CONFIG_LIVEPATCH -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->nip = ip; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 672f95b12d40..818612b784cd 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -13,11 +13,6 @@ #include -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->psw.addr = ip; diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 2f2bdf0662f8..a66f6706c2de 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -24,11 +24,6 @@ #include #include -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index eb0ee10a1981..112a36ed4a09 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1220,14 +1220,6 @@ void klp_module_going(struct module *mod) static int __init klp_init(void) { - int ret; - - ret = klp_check_compiler_support(); - if (ret) { - pr_info("Your compiler is too old; turning off.\n"); - return -EINVAL; - } - klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj); if (!klp_root_kobj) return -ENOMEM; From 693713cbdb3a4bda5a8a678c31f06560bbb14657 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sat, 11 May 2019 08:32:40 -0400 Subject: [PATCH 44/44] x86: Hide the int3_emulate_call/jmp functions from UML User Mode Linux does not have access to the ip or sp fields of the pt_regs, and accessing them causes UML to fail to build. Hide the int3_emulate_jmp() and int3_emulate_call() instructions from UML, as it doesn't need them anyway. Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/text-patching.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 05861cc08787..0bbb07eaed6b 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,6 +39,7 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +#ifndef CONFIG_UML_X86 static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; @@ -65,6 +66,7 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } -#endif +#endif /* CONFIG_X86_64 */ +#endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */