From 42b28ac071a1a239d2a48965e9d6be0e061dd103 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Sep 2011 12:33:28 -0300 Subject: [PATCH 01/16] perf hists: Stop using 'self' for struct hists Stop using this python/OOP convention, doesn't really helps. Will do more from time to time till we get it cleaned up in all of /perf. Suggested-by: Thomas Gleixner Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-91i56jwnzq9edhsj9y2y9l3b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 156 ++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 677e1da6bb3e..dd277897ff0b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -18,56 +18,56 @@ struct callchain_param callchain_param = { .order = ORDER_CALLEE }; -u16 hists__col_len(struct hists *self, enum hist_column col) +u16 hists__col_len(struct hists *hists, enum hist_column col) { - return self->col_len[col]; + return hists->col_len[col]; } -void hists__set_col_len(struct hists *self, enum hist_column col, u16 len) +void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len) { - self->col_len[col] = len; + hists->col_len[col] = len; } -bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len) +bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) { - if (len > hists__col_len(self, col)) { - hists__set_col_len(self, col, len); + if (len > hists__col_len(hists, col)) { + hists__set_col_len(hists, col, len); return true; } return false; } -static void hists__reset_col_len(struct hists *self) +static void hists__reset_col_len(struct hists *hists) { enum hist_column col; for (col = 0; col < HISTC_NR_COLS; ++col) - hists__set_col_len(self, col, 0); + hists__set_col_len(hists, col, 0); } -static void hists__calc_col_len(struct hists *self, struct hist_entry *h) +static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { u16 len; if (h->ms.sym) - hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); else { const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - if (hists__col_len(self, HISTC_DSO) < unresolved_col_width && + if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && !symbol_conf.col_width_list_str && !symbol_conf.field_sep && !symbol_conf.dso_list) - hists__set_col_len(self, HISTC_DSO, + hists__set_col_len(hists, HISTC_DSO, unresolved_col_width); } len = thread__comm_len(h->thread); - if (hists__new_col_len(self, HISTC_COMM, len)) - hists__set_col_len(self, HISTC_THREAD, len + 6); + if (hists__new_col_len(hists, HISTC_COMM, len)) + hists__set_col_len(hists, HISTC_THREAD, len + 6); if (h->ms.map) { len = dso__name_len(h->ms.map->dso); - hists__new_col_len(self, HISTC_DSO, len); + hists__new_col_len(hists, HISTC_DSO, len); } } @@ -113,11 +113,11 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) return self; } -static void hists__inc_nr_entries(struct hists *self, struct hist_entry *h) +static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) { if (!h->filtered) { - hists__calc_col_len(self, h); - ++self->nr_entries; + hists__calc_col_len(hists, h); + ++hists->nr_entries; } } @@ -128,11 +128,11 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *self, +struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, u64 period) { - struct rb_node **p = &self->entries.rb_node; + struct rb_node **p = &hists->entries.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { @@ -172,8 +172,8 @@ struct hist_entry *__hists__add_entry(struct hists *self, if (!he) return NULL; rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &self->entries); - hists__inc_nr_entries(self, he); + rb_insert_color(&he->rb_node, &hists->entries); + hists__inc_nr_entries(hists, he); out: hist_entry__add_cpumode_period(he, al->cpumode, period); return he; @@ -222,7 +222,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *self, +static bool hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he) { @@ -240,8 +240,8 @@ static bool hists__collapse_insert_entry(struct hists *self, if (!cmp) { iter->period += he->period; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&self->callchain_cursor); - callchain_merge(&self->callchain_cursor, iter->callchain, + callchain_cursor_reset(&hists->callchain_cursor); + callchain_merge(&hists->callchain_cursor, iter->callchain, he->callchain); } hist_entry__free(he); @@ -259,7 +259,7 @@ static bool hists__collapse_insert_entry(struct hists *self, return true; } -void hists__collapse_resort(struct hists *self) +void hists__collapse_resort(struct hists *hists) { struct rb_root tmp; struct rb_node *next; @@ -269,20 +269,20 @@ void hists__collapse_resort(struct hists *self) return; tmp = RB_ROOT; - next = rb_first(&self->entries); - self->nr_entries = 0; - hists__reset_col_len(self); + next = rb_first(&hists->entries); + hists->nr_entries = 0; + hists__reset_col_len(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->entries); - if (hists__collapse_insert_entry(self, &tmp, n)) - hists__inc_nr_entries(self, n); + rb_erase(&n->rb_node, &hists->entries); + if (hists__collapse_insert_entry(hists, &tmp, n)) + hists__inc_nr_entries(hists, n); } - self->entries = tmp; + hists->entries = tmp; } /* @@ -315,31 +315,31 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *self) +void hists__output_resort(struct hists *hists) { struct rb_root tmp; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; - min_callchain_hits = self->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); tmp = RB_ROOT; - next = rb_first(&self->entries); + next = rb_first(&hists->entries); - self->nr_entries = 0; - hists__reset_col_len(self); + hists->nr_entries = 0; + hists__reset_col_len(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->entries); + rb_erase(&n->rb_node, &hists->entries); __hists__insert_output_entry(&tmp, n, min_callchain_hits); - hists__inc_nr_entries(self, n); + hists__inc_nr_entries(hists, n); } - self->entries = tmp; + hists->entries = tmp; } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -738,7 +738,7 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, left_margin); } -size_t hists__fprintf(struct hists *self, struct hists *pair, +size_t hists__fprintf(struct hists *hists, struct hists *pair, bool show_displacement, FILE *fp) { struct sort_entry *se; @@ -803,15 +803,15 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, width = strlen(se->se_header); if (symbol_conf.col_width_list_str) { if (col_width) { - hists__set_col_len(self, se->se_width_idx, + hists__set_col_len(hists, se->se_width_idx, atoi(col_width)); col_width = strchr(col_width, ','); if (col_width) ++col_width; } } - if (!hists__new_col_len(self, se->se_width_idx, width)) - width = hists__col_len(self, se->se_width_idx); + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); fprintf(fp, " %*s", width, se->se_header); } fprintf(fp, "\n"); @@ -834,7 +834,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, continue; fprintf(fp, " "); - width = hists__col_len(self, se->se_width_idx); + width = hists__col_len(hists, se->se_width_idx); if (width == 0) width = strlen(se->se_header); for (i = 0; i < width; i++) @@ -844,7 +844,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, fprintf(fp, "\n#\n"); print_entries: - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (h->filtered) @@ -858,12 +858,12 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, self, pair, show_displacement, - displacement, fp, self->stats.total_period); + ret += hist_entry__fprintf(h, hists, pair, show_displacement, + displacement, fp, hists->stats.total_period); if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, self, fp, - self->stats.total_period); + ret += hist_entry__fprintf_callchain(h, hists, fp, + hists->stats.total_period); if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, MAP__FUNCTION, verbose, fp); @@ -879,7 +879,7 @@ print_entries: /* * See hists__fprintf to match the column widths */ -unsigned int hists__sort_list_width(struct hists *self) +unsigned int hists__sort_list_width(struct hists *hists) { struct sort_entry *se; int ret = 9; /* total % */ @@ -898,7 +898,7 @@ unsigned int hists__sort_list_width(struct hists *self) list_for_each_entry(se, &hist_entry__sort_list, list) if (!se->elide) - ret += 2 + hists__col_len(self, se->se_width_idx); + ret += 2 + hists__col_len(hists, se->se_width_idx); if (verbose) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; @@ -906,32 +906,32 @@ unsigned int hists__sort_list_width(struct hists *self) return ret; } -static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h, +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { h->filtered &= ~(1 << filter); if (h->filtered) return; - ++self->nr_entries; + ++hists->nr_entries; if (h->ms.unfolded) - self->nr_entries += h->nr_rows; + hists->nr_entries += h->nr_rows; h->row_offset = 0; - self->stats.total_period += h->period; - self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; + hists->stats.total_period += h->period; + hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; - hists__calc_col_len(self, h); + hists__calc_col_len(hists, h); } -void hists__filter_by_dso(struct hists *self, const struct dso *dso) +void hists__filter_by_dso(struct hists *hists, const struct dso *dso) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (symbol_conf.exclude_other && !h->parent) @@ -942,19 +942,19 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso) continue; } - hists__remove_entry_filter(self, h, HIST_FILTER__DSO); + hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); } } -void hists__filter_by_thread(struct hists *self, const struct thread *thread) +void hists__filter_by_thread(struct hists *hists, const struct thread *thread) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (thread != NULL && h->thread != thread) { @@ -962,7 +962,7 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread) continue; } - hists__remove_entry_filter(self, h, HIST_FILTER__THREAD); + hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); } } @@ -976,13 +976,13 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) return symbol__annotate(he->ms.sym, he->ms.map, privsize); } -void hists__inc_nr_events(struct hists *self, u32 type) +void hists__inc_nr_events(struct hists *hists, u32 type) { - ++self->stats.nr_events[0]; - ++self->stats.nr_events[type]; + ++hists->stats.nr_events[0]; + ++hists->stats.nr_events[type]; } -size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) { int i; size_t ret = 0; @@ -990,7 +990,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (self->stats.nr_events[i] == 0) + if (hists->stats.nr_events[i] == 0) continue; name = perf_event__name(i); @@ -998,7 +998,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) continue; ret += fprintf(fp, "%16s events: %10d\n", name, - self->stats.nr_events[i]); + hists->stats.nr_events[i]); } return ret; From ef9dfe6ec3e409b68e35c05b882d636140bb3fa7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Sep 2011 12:46:11 -0300 Subject: [PATCH 02/16] perf hists: Allow limiting the number of rows and columns in fprintf So that we can reuse hists__fprintf for in the new perf top tool. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-huazw48x05h8r9niz5cf63za@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/util/hist.c | 33 +++++++++++++++++++++++++++------ tools/perf/util/hist.h | 7 ++++--- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index e8219990f8b8..b39f3a1ee7dc 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -162,7 +162,7 @@ static int __cmd_diff(void) hists__match(&session[0]->hists, &session[1]->hists); hists__fprintf(&session[1]->hists, &session[0]->hists, - show_displacement, stdout); + show_displacement, true, 0, 0, stdout); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3d58334909a5..b125742e5cfd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -232,7 +232,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, const char *evname = event_name(pos); hists__fprintf_nr_sample_events(hists, evname, stdout); - hists__fprintf(hists, NULL, false, stdout); + hists__fprintf(hists, NULL, false, true, 0, 0, stdout); fprintf(stdout, "\n\n"); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index dd277897ff0b..24cca0a7ffa3 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -710,12 +710,16 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, return ret; } -int hist_entry__fprintf(struct hist_entry *self, struct hists *hists, +int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, struct hists *pair_hists, bool show_displacement, long displacement, FILE *fp, u64 session_total) { char bf[512]; - hist_entry__snprintf(self, bf, sizeof(bf), hists, pair_hists, + + if (size == 0 || size > sizeof(bf)) + size = sizeof(bf); + + hist_entry__snprintf(he, bf, size, hists, pair_hists, show_displacement, displacement, true, session_total); return fprintf(fp, "%s\n", bf); @@ -739,7 +743,8 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, } size_t hists__fprintf(struct hists *hists, struct hists *pair, - bool show_displacement, FILE *fp) + bool show_displacement, bool show_header, int max_rows, + int max_cols, FILE *fp) { struct sort_entry *se; struct rb_node *nd; @@ -749,9 +754,13 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, unsigned int width; const char *sep = symbol_conf.field_sep; const char *col_width = symbol_conf.col_width_list_str; + int nr_rows = 0; init_rem_hits(); + if (!show_header) + goto print_entries; + fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); if (symbol_conf.show_nr_samples) { @@ -814,7 +823,10 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, width = hists__col_len(hists, se->se_width_idx); fprintf(fp, " %*s", width, se->se_header); } + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; if (sep) goto print_entries; @@ -841,7 +853,13 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, fprintf(fp, "."); } - fprintf(fp, "\n#\n"); + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + fprintf(fp, "#\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; print_entries: for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { @@ -858,19 +876,22 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, hists, pair, show_displacement, + ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, displacement, fp, hists->stats.total_period); if (symbol_conf.use_callchain) ret += hist_entry__fprintf_callchain(h, hists, fp, hists->stats.total_period); + if (max_rows && ++nr_rows >= max_rows) + goto out; + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, MAP__FUNCTION, verbose, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } } - +out: free(rem_sq_bracket); return ret; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3beb97c4d822..861ffc31db96 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -57,9 +57,9 @@ struct hist_entry *__hists__add_entry(struct hists *self, struct symbol *parent, u64 period); extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); -int hist_entry__fprintf(struct hist_entry *self, struct hists *hists, +int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, struct hists *pair_hists, bool show_displacement, - long displacement, FILE *fp, u64 total); + long displacement, FILE *fp, u64 session_total); int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists, struct hists *pair_hists, bool show_displacement, long displacement, @@ -73,7 +73,8 @@ void hists__inc_nr_events(struct hists *self, u32 type); size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); size_t hists__fprintf(struct hists *self, struct hists *pair, - bool show_displacement, FILE *fp); + bool show_displacement, bool show_header, + int max_rows, int max_cols, FILE *fp); int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); int hist_entry__annotate(struct hist_entry *self, size_t privsize); From 3f2728bdb6a4cad0d18b09d03e2008121c902751 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 16:10:06 -0300 Subject: [PATCH 03/16] perf report: Add option to show total period Just like --show-nr-samples, to help in diagnosing problems in the tools. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-1lr7ejdjfvy2uwy2wkmatcpq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 ++ tools/perf/builtin-report.c | 2 ++ tools/perf/util/hist.c | 19 +++++++++++++++++++ tools/perf/util/symbol.h | 1 + 4 files changed, 24 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 6349b6c0e3ec..4e82c19cbbbe 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -137,6 +137,8 @@ OPTIONS -M:: --disassembler-style=:: Set disassembler style for objdump. +--show-total-period:: Show a column with the sum of periods. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b125742e5cfd..c1cc7ab6f849 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -486,6 +486,8 @@ static const struct option options[] = { OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, + "Show a column with the sum of periods"), OPT_END() }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 24cca0a7ffa3..32c90865940f 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -664,6 +664,13 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events); } + if (symbol_conf.show_total_period) { + if (sep) + ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); + else + ret += snprintf(s + ret, size - ret, " %12" PRIu64, period); + } + if (pair_hists) { char bf[32]; double old_percent = 0, new_percent = 0, diff; @@ -770,6 +777,13 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, fputs(" Samples ", fp); } + if (symbol_conf.show_total_period) { + if (sep) + ret += fprintf(fp, "%cPeriod", *sep); + else + ret += fprintf(fp, " Period "); + } + if (symbol_conf.show_cpu_utilization) { if (sep) { ret += fprintf(fp, "%csys", *sep); @@ -834,6 +848,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, fprintf(fp, "# ........"); if (symbol_conf.show_nr_samples) fprintf(fp, " .........."); + if (symbol_conf.show_total_period) + fprintf(fp, " ............"); if (pair) { fprintf(fp, " .........."); if (show_displacement) @@ -917,6 +933,9 @@ unsigned int hists__sort_list_width(struct hists *hists) if (symbol_conf.show_nr_samples) ret += 11; + if (symbol_conf.show_total_period) + ret += 13; + list_for_each_entry(se, &hist_entry__sort_list, list) if (!se->elide) ret += 2 + hists__col_len(hists, se->se_width_idx); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 7733f0b3cd41..29f8d742e92f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -72,6 +72,7 @@ struct symbol_conf { use_modules, sort_by_name, show_nr_samples, + show_total_period, use_callchain, exclude_other, show_cpu_utilization, From 1980c2ebd7020d82c024b8c4046849b38e78e7da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 17:50:23 -0300 Subject: [PATCH 04/16] perf hists: Threaded addition and sorting of entries By using a mutex just for inserting and rotating two hist_entry rb trees, so that when sorting we can get the last batch of entries created from the ring buffer, merge it with whatever we have processed so far and show the output while new entries are being added. The 'report' tool continues, for now, to do it without threading, but will use this in the future to allow visualization of results in long perf.data sessions while the entries are being processed. The new 'top' tool will be the first user. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9b05atsn0q6m7fqgrug8fk2i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + tools/perf/util/hist.c | 129 ++++++++++++++++++++++++++++------------ tools/perf/util/hist.h | 9 +++ tools/perf/util/sort.h | 1 + 4 files changed, 101 insertions(+), 39 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e389815078d3..b46f6e4bff3c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -39,6 +39,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->idx = idx; evsel->attr = *attr; INIT_LIST_HEAD(&evsel->node); + hists__init(&evsel->hists); } struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 32c90865940f..80fe30d90d72 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -118,6 +118,7 @@ static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) if (!h->filtered) { hists__calc_col_len(hists, h); ++hists->nr_entries; + hists->stats.total_period += h->period; } } @@ -132,7 +133,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, u64 period) { - struct rb_node **p = &hists->entries.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { @@ -150,9 +151,13 @@ struct hist_entry *__hists__add_entry(struct hists *hists, }; int cmp; + pthread_mutex_lock(&hists->lock); + + p = &hists->entries_in->rb_node; + while (*p != NULL) { parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); + he = rb_entry(parent, struct hist_entry, rb_node_in); cmp = hist_entry__cmp(&entry, he); @@ -170,12 +175,14 @@ struct hist_entry *__hists__add_entry(struct hists *hists, he = hist_entry__new(&entry); if (!he) - return NULL; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hists->entries); - hists__inc_nr_entries(hists, he); + goto out_unlock; + + rb_link_node(&he->rb_node_in, parent, p); + rb_insert_color(&he->rb_node_in, hists->entries_in); out: hist_entry__add_cpumode_period(he, al->cpumode, period); +out_unlock: + pthread_mutex_unlock(&hists->lock); return he; } @@ -233,7 +240,7 @@ static bool hists__collapse_insert_entry(struct hists *hists, while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); + iter = rb_entry(parent, struct hist_entry, rb_node_in); cmp = hist_entry__collapse(iter, he); @@ -254,35 +261,57 @@ static bool hists__collapse_insert_entry(struct hists *hists, p = &(*p)->rb_right; } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_link_node(&he->rb_node_in, parent, p); + rb_insert_color(&he->rb_node_in, root); return true; } +static struct rb_root *hists__get_rotate_entries_in(struct hists *hists) +{ + struct rb_root *root; + + pthread_mutex_lock(&hists->lock); + + root = hists->entries_in; + if (++hists->entries_in > &hists->entries_in_array[1]) + hists->entries_in = &hists->entries_in_array[0]; + + pthread_mutex_unlock(&hists->lock); + + return root; +} + +static void __hists__collapse_resort(struct hists *hists, bool threaded) +{ + struct rb_root *root; + struct rb_node *next; + struct hist_entry *n; + + if (!sort__need_collapse && !threaded) + return; + + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); + hists->stats.total_period = 0; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node_in); + next = rb_next(&n->rb_node_in); + + rb_erase(&n->rb_node_in, root); + if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) + hists__inc_nr_entries(hists, n); + } +} + void hists__collapse_resort(struct hists *hists) { - struct rb_root tmp; - struct rb_node *next; - struct hist_entry *n; + return __hists__collapse_resort(hists, false); +} - if (!sort__need_collapse) - return; - - tmp = RB_ROOT; - next = rb_first(&hists->entries); - hists->nr_entries = 0; - hists__reset_col_len(hists); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hists->entries); - if (hists__collapse_insert_entry(hists, &tmp, n)) - hists__inc_nr_entries(hists, n); - } - - hists->entries = tmp; +void hists__collapse_resort_threaded(struct hists *hists) +{ + return __hists__collapse_resort(hists, true); } /* @@ -315,31 +344,43 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists) +static void __hists__output_resort(struct hists *hists, bool threaded) { - struct rb_root tmp; + struct rb_root *root; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); - tmp = RB_ROOT; - next = rb_first(&hists->entries); + if (sort__need_collapse || threaded) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + next = rb_first(root); + hists->entries = RB_ROOT; hists->nr_entries = 0; hists__reset_col_len(hists); while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); + n = rb_entry(next, struct hist_entry, rb_node_in); + next = rb_next(&n->rb_node_in); - rb_erase(&n->rb_node, &hists->entries); - __hists__insert_output_entry(&tmp, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); hists__inc_nr_entries(hists, n); } +} - hists->entries = tmp; +void hists__output_resort(struct hists *hists) +{ + return __hists__output_resort(hists, false); +} + +void hists__output_resort_threaded(struct hists *hists) +{ + return __hists__output_resort(hists, true); } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -1043,3 +1084,13 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) return ret; } + +void hists__init(struct hists *hists) +{ + memset(hists, 0, sizeof(*hists)); + hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; + hists->entries_in = &hists->entries_in_array[0]; + hists->entries_collapsed = RB_ROOT; + hists->entries = RB_ROOT; + pthread_mutex_init(&hists->lock, NULL); +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 861ffc31db96..d3f976cc7c56 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -2,6 +2,7 @@ #define __PERF_HIST_H #include +#include #include "callchain.h" extern struct callchain_param callchain_param; @@ -43,8 +44,12 @@ enum hist_column { }; struct hists { + struct rb_root entries_in_array[2]; + struct rb_root *entries_in; struct rb_root entries; + struct rb_root entries_collapsed; u64 nr_entries; + pthread_mutex_t lock; struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; @@ -52,6 +57,8 @@ struct hists { struct callchain_cursor callchain_cursor; }; +void hists__init(struct hists *hists); + struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *parent, u64 period); @@ -67,7 +74,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, void hist_entry__free(struct hist_entry *); void hists__output_resort(struct hists *self); +void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); +void hists__collapse_resort_threaded(struct hists *hists); void hists__inc_nr_events(struct hists *self, u32 type); size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 77d0388ad415..03851e301721 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -45,6 +45,7 @@ extern enum sort_type sort__first_dimension; * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding */ struct hist_entry { + struct rb_node rb_node_in; struct rb_node rb_node; u64 period; u64 period_sys; From 81cce8de9437be9234f651be1f03e596c1b1a79a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:11:32 -0300 Subject: [PATCH 05/16] perf browsers: Add live mode to the hists, annotate browsers This allows passing a timer to be run periodically, which will update the hists tree that then gers refreshed on the screen, just like the Live mode (symbol entries, annotation) we already have in 'perf top --tui'. Will be used by the new hist_entry/hists based 'top' tool. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2r44qd8oe4sagzcgoikl8qzc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 7 +- tools/perf/util/annotate.h | 7 +- tools/perf/util/evlist.c | 6 ++ tools/perf/util/evlist.h | 4 + tools/perf/util/hist.h | 14 +++- tools/perf/util/ui/browsers/annotate.c | 25 +++--- tools/perf/util/ui/browsers/hists.c | 105 ++++++++++++++++--------- tools/perf/util/ui/browsers/top.c | 2 +- 9 files changed, 116 insertions(+), 56 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cf68819f7453..39e3d382b2d8 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -137,7 +137,7 @@ find_next: } if (use_browser > 0) { - key = hist_entry__tui_annotate(he, evidx); + key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0); switch (key) { case KEY_RIGHT: next = rb_next(nd); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c1cc7ab6f849..e7140c6289b8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -327,9 +327,10 @@ static int __cmd_report(void) goto out_delete; } - if (use_browser > 0) - perf_evlist__tui_browse_hists(session->evlist, help); - else + if (use_browser > 0) { + perf_evlist__tui_browse_hists(session->evlist, help, + NULL, NULL, 0); + } else perf_evlist__tty_browse_hists(session->evlist, help); out_delete: diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6ede1286ee71..aafbc7e9a9ba 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -91,13 +91,16 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, #ifdef NO_NEWT_SUPPORT static inline int symbol__tui_annotate(struct symbol *sym __used, struct map *map __used, - int evidx __used, int refresh __used) + int evidx __used, + void(*timer)(void *arg) __used, + void *arg __used, int delay_secs __used) { return 0; } #else int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - int refresh); + void(*timer)(void *arg), void *arg, + int delay_secs); #endif extern const char *disassembler_style; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 72e9f4886b6d..2f6bc89027da 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -533,3 +533,9 @@ bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) first = list_entry(evlist->entries.next, struct perf_evsel, node); return first->attr.sample_id_all; } + +void perf_evlist__set_selected(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + evlist->selected = evsel; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f34915002745..6be71fc57794 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -25,6 +25,7 @@ struct perf_evlist { struct pollfd *pollfd; struct thread_map *threads; struct cpu_map *cpus; + struct perf_evsel *selected; }; struct perf_evsel; @@ -56,6 +57,9 @@ void perf_evlist__munmap(struct perf_evlist *evlist); void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); +void perf_evlist__set_selected(struct perf_evlist *evlist, + struct perf_evsel *evsel); + static inline void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d3f976cc7c56..424f9eb8310c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -100,13 +100,16 @@ struct perf_evlist; #ifdef NO_NEWT_SUPPORT static inline int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, - const char *help __used) + const char *help __used, void(*timer)(void *arg) __used, void *arg, + int refresh __used) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *self __used, - int evidx __used) + int evidx __used, + void(*timer)(void *arg) __used, + void *arg __used, int delay_secs __used); { return 0; } @@ -114,12 +117,15 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used, #define KEY_RIGHT -2 #else #include -int hist_entry__tui_annotate(struct hist_entry *self, int evidx); +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, + void(*timer)(void *arg), void *arg, int delay_secs); #define KEY_LEFT NEWT_KEY_LEFT #define KEY_RIGHT NEWT_KEY_RIGHT -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help); +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, + void(*timer)(void *arg), void *arg, + int refresh); #endif unsigned int hists__sort_list_width(struct hists *self); diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 0229723aceb3..76c1d083aa94 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -164,7 +164,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } static int annotate_browser__run(struct annotate_browser *self, int evidx, - int refresh) + void(*timer)(void *arg) __used, void *arg __used, + int delay_secs) { struct rb_node *nd = NULL; struct symbol *sym = self->b.priv; @@ -189,13 +190,13 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, nd = self->curr_hot; - if (refresh != 0) - newtFormSetTimer(self->b.form, refresh); + if (delay_secs != 0) + newtFormSetTimer(self->b.form, delay_secs * 1000); while (1) { key = ui_browser__run(&self->b); - if (refresh != 0) { + if (delay_secs != 0) { annotate_browser__calc_percent(self, evidx); /* * Current line focus got out of the list of most active @@ -212,7 +213,10 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, * FIXME we need to check if it was * es.reason == NEWT_EXIT_TIMER */ - if (refresh != 0) + if (timer != NULL) + timer(arg); + + if (delay_secs != 0) symbol__annotate_decay_histogram(sym, evidx); continue; case NEWT_KEY_TAB: @@ -246,13 +250,16 @@ out: return key; } -int hist_entry__tui_annotate(struct hist_entry *he, int evidx) +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, + void(*timer)(void *arg), void *arg, int delay_secs) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0); + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, + timer, arg, delay_secs); } int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - int refresh) + void(*timer)(void *arg), void *arg, + int delay_secs) { struct objdump_line *pos, *n; struct annotation *notes; @@ -293,7 +300,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser, evidx, refresh); + ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs); list_for_each_entry_safe(pos, n, ¬es->src->source, node) { list_del(&pos->node); objdump_line__free(pos); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 5d767c622dfc..6244d19bd1f2 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -24,8 +24,14 @@ struct hist_browser { struct hists *hists; struct hist_entry *he_selection; struct map_symbol *selection; + const struct thread *thread_filter; + const struct dso *dso_filter; }; +static int hists__browser_title(struct hists *self, char *bf, size_t size, + const char *ev_name, const struct dso *dso, + const struct thread *thread); + static void hist_browser__refresh_dimensions(struct hist_browser *self) { /* 3 == +/- toggle symbol before actual hist_entry rendering */ @@ -290,9 +296,12 @@ static void hist_browser__set_folding(struct hist_browser *self, bool unfold) ui_browser__reset_index(&self->b); } -static int hist_browser__run(struct hist_browser *self, const char *title) +static int hist_browser__run(struct hist_browser *self, const char *ev_name, + void(*timer)(void *arg), void *arg, int delay_secs) { int key; + int delay_msecs = delay_secs * 1000; + char title[160]; int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, }; @@ -301,17 +310,30 @@ static int hist_browser__run(struct hist_browser *self, const char *title) self->b.nr_entries = self->hists->nr_entries; hist_browser__refresh_dimensions(self); + hists__browser_title(self->hists, title, sizeof(title), ev_name, + self->dso_filter, self->thread_filter); if (ui_browser__show(&self->b, title, "Press '?' for help on key bindings") < 0) return -1; + if (timer != NULL) + newtFormSetTimer(self->b.form, delay_msecs); + ui_browser__add_exit_keys(&self->b, exit_keys); while (1) { key = ui_browser__run(&self->b); switch (key) { + case -1: + /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ + timer(arg); + hists__browser_title(self->hists, title, sizeof(title), + ev_name, self->dso_filter, + self->thread_filter); + ui_browser__show_title(&self->b, title); + continue; case 'D': { /* Debug */ static int seq; struct hist_entry *h = rb_entry(self->b.top, @@ -805,14 +827,13 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, static int perf_evsel__hists_browse(struct perf_evsel *evsel, const char *helpline, const char *ev_name, - bool left_exits) + bool left_exits, + void(*timer)(void *arg), void *arg, + int delay_secs) { struct hists *self = &evsel->hists; struct hist_browser *browser = hist_browser__new(self); struct pstack *fstack; - const struct thread *thread_filter = NULL; - const struct dso *dso_filter = NULL; - char msg[160]; int key = -1; if (browser == NULL) @@ -824,8 +845,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, ui_helpline__push(helpline); - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); while (1) { const struct thread *thread = NULL; const struct dso *dso = NULL; @@ -834,7 +853,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, annotate = -2, zoom_dso = -2, zoom_thread = -2, browse_map = -2; - key = hist_browser__run(browser, msg); + key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -889,9 +908,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, continue; } top = pstack__pop(fstack); - if (top == &dso_filter) + if (top == &browser->dso_filter) goto zoom_out_dso; - if (top == &thread_filter) + if (top == &browser->thread_filter) goto zoom_out_thread; continue; } @@ -913,14 +932,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", - (thread_filter ? "out of" : "into"), + (browser->thread_filter ? "out of" : "into"), (thread->comm_set ? thread->comm : ""), thread->pid) > 0) zoom_thread = nr_options++; if (dso != NULL && asprintf(&options[nr_options], "Zoom %s %s DSO", - (dso_filter ? "out of" : "into"), + (browser->dso_filter ? "out of" : "into"), (dso->kernel ? "the Kernel" : dso->short_name)) > 0) zoom_dso = nr_options++; @@ -949,45 +968,42 @@ do_annotate: if (he == NULL) continue; - hist_entry__tui_annotate(he, evsel->idx); + hist_entry__tui_annotate(he, evsel->idx, + timer, arg, delay_secs); } else if (choice == browse_map) map__browse(browser->selection->map); else if (choice == zoom_dso) { zoom_dso: - if (dso_filter) { - pstack__remove(fstack, &dso_filter); + if (browser->dso_filter) { + pstack__remove(fstack, &browser->dso_filter); zoom_out_dso: ui_helpline__pop(); - dso_filter = NULL; + browser->dso_filter = NULL; } else { if (dso == NULL) continue; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", dso->kernel ? "the Kernel" : dso->short_name); - dso_filter = dso; - pstack__push(fstack, &dso_filter); + browser->dso_filter = dso; + pstack__push(fstack, &browser->dso_filter); } - hists__filter_by_dso(self, dso_filter); - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); + hists__filter_by_dso(self, browser->dso_filter); hist_browser__reset(browser); } else if (choice == zoom_thread) { zoom_thread: - if (thread_filter) { - pstack__remove(fstack, &thread_filter); + if (browser->thread_filter) { + pstack__remove(fstack, &browser->thread_filter); zoom_out_thread: ui_helpline__pop(); - thread_filter = NULL; + browser->thread_filter = NULL; } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread->comm : "", thread->pid); - thread_filter = thread; - pstack__push(fstack, &thread_filter); + browser->thread_filter = thread; + pstack__push(fstack, &browser->thread_filter); } - hists__filter_by_thread(self, thread_filter); - hists__browser_title(self, msg, sizeof(msg), ev_name, - dso_filter, thread_filter); + hists__filter_by_thread(self, browser->thread_filter); hist_browser__reset(browser); } } @@ -1026,9 +1042,11 @@ static void perf_evsel_menu__write(struct ui_browser *browser, menu->selection = evsel; } -static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help) +static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help, + void(*timer)(void *arg), void *arg, int delay_secs) { int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; + int delay_msecs = delay_secs * 1000; struct perf_evlist *evlist = menu->b.priv; struct perf_evsel *pos; const char *ev_name, *title = "Available samples"; @@ -1038,20 +1056,29 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help) "ESC: exit, ENTER|->: Browse histograms") < 0) return -1; + if (timer != NULL) + newtFormSetTimer(menu->b.form, delay_msecs); + ui_browser__add_exit_keys(&menu->b, exit_keys); while (1) { key = ui_browser__run(&menu->b); switch (key) { + case -1: + /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ + timer(arg); + continue; case NEWT_KEY_RIGHT: case NEWT_KEY_ENTER: if (!menu->selection) continue; pos = menu->selection; + perf_evlist__set_selected(evlist, pos); browse_hists: ev_name = event_name(pos); - key = perf_evsel__hists_browse(pos, help, ev_name, true); + key = perf_evsel__hists_browse(pos, help, ev_name, true, + timer, arg, delay_secs); ui_browser__show_title(&menu->b, title); break; case NEWT_KEY_LEFT: @@ -1091,7 +1118,9 @@ out: } static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, - const char *help) + const char *help, + void(*timer)(void *arg), void *arg, + int delay_secs) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -1121,18 +1150,22 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, pos->name = strdup(ev_name); } - return perf_evsel_menu__run(&menu, help); + return perf_evsel_menu__run(&menu, help, timer, arg, delay_secs); } -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help) +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, + void(*timer)(void *arg), void *arg, + int delay_secs) { if (evlist->nr_entries == 1) { struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); const char *ev_name = event_name(first); - return perf_evsel__hists_browse(first, help, ev_name, false); + return perf_evsel__hists_browse(first, help, ev_name, false, + timer, arg, delay_secs); } - return __perf_evlist__tui_browse_hists(evlist, help); + return __perf_evlist__tui_browse_hists(evlist, help, + timer, arg, delay_secs); } diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index 9b6b43b32ac8..d43875b2356f 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -142,7 +142,7 @@ static void perf_top_browser__annotate(struct perf_top_browser *browser) pthread_mutex_unlock(¬es->lock); do_annotation: - symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000); + symbol__tui_annotate(sym, syme->map, 0, NULL, NULL, top->delay_secs * 1000); } static void perf_top_browser__warn_lost(struct perf_top_browser *browser) From ab81f3fd350c510730adb1ca40ef55c2b2952121 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:16:15 -0300 Subject: [PATCH 06/16] perf top: Reuse the 'report' hist_entry/hists classes This actually fixes several problems we had in the old 'perf top': 1. Unresolved symbols not show, limitation that came from the old "KernelTop" codebase, to solve it we would need to do changes that would make sym_entry have most of the hist_entry fields. 2. It was using the number of samples, not the sum of sample->period. And brings the --sort code that allows us to have all the views in 'perf report', for instance: [root@emilia ~]# perf top --sort dso PerfTop: 5903 irqs/sec kernel:77.5% exact: 0.0% [1000Hz cycles], (all, 8 CPUs) ------------------------------------------------------------------------------ 31.59% libcrypto.so.1.0.0 21.55% [kernel] 18.57% libpython2.6.so.1.0 7.04% libc-2.12.so 6.99% _backend_agg.so 4.72% sshd 1.48% multiarray.so 1.39% libfreetype.so.6.3.22 1.37% perf 0.71% libgobject-2.0.so.0.2200.5 0.53% [tg3] 0.48% libglib-2.0.so.0.2200.5 0.44% libstdc++.so.6.0.13 0.40% libcairo.so.2.10800.8 0.38% libm-2.12.so 0.34% umath.so 0.30% libgdk-x11-2.0.so.0.1800.9 0.22% libpthread-2.12.so 0.20% libgtk-x11-2.0.so.0.1800.9 0.20% librt-2.12.so 0.15% _path.so 0.13% libpango-1.0.so.0.2800.1 0.11% libatlas.so.3.0 0.09% ft2font.so 0.09% libpangoft2-1.0.so.0.2800.1 0.08% libX11.so.6.3.0 0.07% [vdso] 0.06% cyclictest ^C All the filter lists can be used as well: --dsos, --comms, --symbols, etc. The 'perf report' TUI is also reused, being possible to apply all the zoom operations, do annotation, etc. This change will allow multiple simplifications in the symbol system as well, that will be detailed in upcoming changesets. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xzaaldxq7zhqrrxdxjifk1mh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 23 ++- tools/perf/Makefile | 4 - tools/perf/builtin-top.c | 282 +++++++++++--------------- tools/perf/util/hist.c | 50 +++++ tools/perf/util/hist.h | 3 + tools/perf/util/top.c | 141 +------------ tools/perf/util/top.h | 36 +--- tools/perf/util/ui/browsers/top.c | 236 --------------------- 8 files changed, 196 insertions(+), 579 deletions(-) delete mode 100644 tools/perf/util/ui/browsers/top.c diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f6eb1cdafb77..d146ba33d691 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -106,6 +106,26 @@ Default is to monitor all CPUS. --zero:: Zero history across display updates. +-s:: +--sort:: + Sort by key(s): pid, comm, dso, symbol, parent + +-n:: +--show-nr-samples:: + Show a column with the number of samples. + +--show-total-period:: + Show a column with the sum of periods. + +--dsos:: + Only consider symbols in these dsos. + +--comms:: + Only consider symbols in these comms. + +--symbols:: + Only consider these symbols. + INTERACTIVE PROMPTING KEYS -------------------------- @@ -130,9 +150,6 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. -[w]:: - Toggle between weighted sum and individual count[E]r profile. - [z]:: Toggle event count zeroing across display updates. diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e9d5c271db69..37fe93019bc6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -466,7 +466,6 @@ else LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o - LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o LIB_OBJS += $(OUTPUT)util/ui/helpline.o LIB_OBJS += $(OUTPUT)util/ui/progress.o LIB_OBJS += $(OUTPUT)util/ui/util.o @@ -729,9 +728,6 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< -$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< - $(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5ede7d7c9239..2cf5e50a6997 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -5,6 +5,7 @@ * any workload, CPU or specific PID. * * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * 2011, Red Hat Inc, Arnaldo Carvalho de Melo * * Improvements and fixes by: * @@ -36,6 +37,7 @@ #include "util/parse-events.h" #include "util/cpumap.h" #include "util/xyarray.h" +#include "util/sort.h" #include "util/debug.h" @@ -65,12 +67,8 @@ static struct perf_top top = { .count_filter = 5, .delay_secs = 2, - .display_weighted = -1, .target_pid = -1, .target_tid = -1, - .active_symbols = LIST_HEAD_INIT(top.active_symbols), - .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER, - .active_symbols_cond = PTHREAD_COND_INITIALIZER, .freq = 1000, /* 1 KHz */ }; @@ -85,7 +83,6 @@ static bool vmlinux_warned; static bool inherit = false; static int realtime_prio = 0; static bool group = false; -static unsigned int page_size; static unsigned int mmap_pages = 128; static bool dump_symtab = false; @@ -93,7 +90,6 @@ static bool dump_symtab = false; static struct winsize winsize; static const char *sym_filter = NULL; -struct sym_entry *sym_filter_entry_sched = NULL; static int sym_pcnt_filter = 5; /* @@ -136,18 +132,18 @@ static void sig_winch_handler(int sig __used) update_print_entries(&winsize); } -static int parse_source(struct sym_entry *syme) +static int parse_source(struct hist_entry *he) { struct symbol *sym; struct annotation *notes; struct map *map; int err = -1; - if (!syme) + if (!he || !he->ms.sym) return -1; - sym = sym_entry__symbol(syme); - map = syme->map; + sym = he->ms.sym; + map = he->ms.map; /* * We can't annotate with just /proc/kallsyms @@ -175,53 +171,62 @@ static int parse_source(struct sym_entry *syme) return err; } - err = symbol__annotate(sym, syme->map, 0); + err = symbol__annotate(sym, map, 0); if (err == 0) { out_assign: - top.sym_filter_entry = syme; + top.sym_filter_entry = he; } pthread_mutex_unlock(¬es->lock); return err; } -static void __zero_source_counters(struct sym_entry *syme) +static void __zero_source_counters(struct hist_entry *he) { - struct symbol *sym = sym_entry__symbol(syme); + struct symbol *sym = he->ms.sym; symbol__annotate_zero_histograms(sym); } -static void record_precise_ip(struct sym_entry *syme, struct map *map, - int counter, u64 ip) +static void record_precise_ip(struct hist_entry *he, int counter, u64 ip) { struct annotation *notes; struct symbol *sym; - if (syme != top.sym_filter_entry) + if (he == NULL || he->ms.sym == NULL || + (he != top.sym_filter_entry && use_browser != 1)) return; - sym = sym_entry__symbol(syme); + sym = he->ms.sym; notes = symbol__annotation(sym); if (pthread_mutex_trylock(¬es->lock)) return; - ip = map->map_ip(map, ip); - symbol__inc_addr_samples(sym, map, counter, ip); + if (notes->src == NULL && + symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { + pthread_mutex_unlock(¬es->lock); + pr_err("Not enough memory for annotating '%s' symbol!\n", + sym->name); + sleep(1); + return; + } + + ip = he->ms.map->map_ip(he->ms.map, ip); + symbol__inc_addr_samples(sym, he->ms.map, counter, ip); pthread_mutex_unlock(¬es->lock); } -static void show_details(struct sym_entry *syme) +static void show_details(struct hist_entry *he) { struct annotation *notes; struct symbol *symbol; int more; - if (!syme) + if (!he) return; - symbol = sym_entry__symbol(syme); + symbol = he->ms.sym; notes = symbol__annotation(symbol); pthread_mutex_lock(¬es->lock); @@ -232,7 +237,7 @@ static void show_details(struct sym_entry *syme) printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); - more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx, + more = symbol__annotate_printf(symbol, he->ms.map, top.sym_evsel->idx, 0, sym_pcnt_filter, top.print_entries, 4); if (top.zero) symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); @@ -246,21 +251,28 @@ out_unlock: static const char CONSOLE_CLEAR[] = ""; -static void __list_insert_active_sym(struct sym_entry *syme) +static struct hist_entry * + perf_session__add_hist_entry(struct perf_session *session, + struct addr_location *al, + struct perf_sample *sample, + struct perf_evsel *evsel) { - list_add(&syme->node, &top.active_symbols); + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, NULL, sample->period); + if (he == NULL) + return NULL; + + session->hists.stats.total_period += sample->period; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + return he; } static void print_sym_table(void) { char bf[160]; int printed = 0; - struct rb_node *nd; - struct sym_entry *syme; - struct rb_root tmp = RB_ROOT; const int win_width = winsize.ws_col - 1; - int sym_width, dso_width, dso_short_width; - float sum_ksamples = perf_top__decay_samples(&top, &tmp); puts(CONSOLE_CLEAR); @@ -276,6 +288,7 @@ static void print_sym_table(void) color_fprintf(stdout, PERF_COLOR_RED, "WARNING:"); printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n", top.total_lost_warned); + ++printed; } if (top.sym_filter_entry) { @@ -283,58 +296,13 @@ static void print_sym_table(void) return; } - perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width, - &sym_width); - - if (sym_width + dso_width > winsize.ws_col - 29) { - dso_width = dso_short_width; - if (sym_width + dso_width > winsize.ws_col - 29) - sym_width = winsize.ws_col - dso_width - 29; - } + hists__collapse_resort_threaded(&top.sym_evsel->hists); + hists__output_resort_threaded(&top.sym_evsel->hists); + hists__decay_entries(&top.sym_evsel->hists); + hists__output_recalc_col_len(&top.sym_evsel->hists, winsize.ws_row - 3); putchar('\n'); - if (top.evlist->nr_entries == 1) - printf(" samples pcnt"); - else - printf(" weight samples pcnt"); - - if (verbose) - printf(" RIP "); - printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); - printf(" %s _______ _____", - top.evlist->nr_entries == 1 ? " " : "______"); - if (verbose) - printf(" ________________"); - printf(" %-*.*s", sym_width, sym_width, graph_line); - printf(" %-*.*s", dso_width, dso_width, graph_line); - puts("\n"); - - for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { - struct symbol *sym; - double pcnt; - - syme = rb_entry(nd, struct sym_entry, rb_node); - sym = sym_entry__symbol(syme); - if (++printed > top.print_entries || - (int)syme->snap_count < top.count_filter) - continue; - - pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / - sum_ksamples)); - - if (top.evlist->nr_entries == 1 || !top.display_weighted) - printf("%20.2f ", syme->weight); - else - printf("%9.1f %10ld ", syme->weight, syme->snap_count); - - percent_color_fprintf(stdout, "%4.1f%%", pcnt); - if (verbose) - printf(" %016" PRIx64, sym->start); - printf(" %-*.*s", sym_width, sym_width, sym->name); - printf(" %-*.*s\n", dso_width, dso_width, - dso_width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name); - } + hists__fprintf(&top.sym_evsel->hists, NULL, false, false, + winsize.ws_row - 4 - printed, win_width, stdout); } static void prompt_integer(int *target, const char *msg) @@ -372,10 +340,11 @@ static void prompt_percent(int *target, const char *msg) *target = tmp; } -static void prompt_symbol(struct sym_entry **target, const char *msg) +static void prompt_symbol(struct hist_entry **target, const char *msg) { char *buf = malloc(0), *p; - struct sym_entry *syme = *target, *n, *found = NULL; + struct hist_entry *syme = *target, *n, *found = NULL; + struct rb_node *next; size_t dummy = 0; /* zero counters of active symbol */ @@ -392,17 +361,14 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) if (p) *p = 0; - pthread_mutex_lock(&top.active_symbols_lock); - syme = list_entry(top.active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&top.active_symbols_lock); - - list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) { - struct symbol *sym = sym_entry__symbol(syme); - - if (!strcmp(buf, sym->name)) { - found = syme; + next = rb_first(&top.sym_evsel->hists.entries); + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { + found = n; break; } + next = rb_next(&n->rb_node); } if (!found) { @@ -421,7 +387,7 @@ static void print_mapped_keys(void) char *name = NULL; if (top.sym_filter_entry) { - struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); + struct symbol *sym = top.sym_filter_entry->ms.sym; name = sym->name; } @@ -438,9 +404,6 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); - if (top.evlist->nr_entries > 1) - fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0); - fprintf(stdout, "\t[K] hide kernel_symbols symbols. \t(%s)\n", top.hide_kernel_symbols ? "yes" : "no"); @@ -467,8 +430,6 @@ static int key_mapped(int c) case 'S': return 1; case 'E': - case 'w': - return top.evlist->nr_entries > 1 ? 1 : 0; default: break; } @@ -561,7 +522,7 @@ static void handle_keypress(int c) if (!top.sym_filter_entry) break; else { - struct sym_entry *syme = top.sym_filter_entry; + struct hist_entry *syme = top.sym_filter_entry; top.sym_filter_entry = NULL; __zero_source_counters(syme); @@ -570,9 +531,6 @@ static void handle_keypress(int c) case 'U': top.hide_user_symbols = !top.hide_user_symbols; break; - case 'w': - top.display_weighted = ~top.display_weighted; - break; case 'z': top.zero = !top.zero; break; @@ -581,19 +539,29 @@ static void handle_keypress(int c) } } +static void perf_top__sort_new_samples(void *arg) +{ + struct perf_top *t = arg; + perf_top__reset_sample_counters(t); + + if (t->evlist->selected != NULL) + t->sym_evsel = t->evlist->selected; + + hists__collapse_resort_threaded(&t->sym_evsel->hists); + hists__output_resort_threaded(&t->sym_evsel->hists); + hists__decay_entries(&t->sym_evsel->hists); + hists__output_recalc_col_len(&t->sym_evsel->hists, winsize.ws_row - 3); +} + static void *display_thread_tui(void *arg __used) { - int err = 0; - pthread_mutex_lock(&top.active_symbols_lock); - while (list_empty(&top.active_symbols)) { - err = pthread_cond_wait(&top.active_symbols_cond, - &top.active_symbols_lock); - if (err) - break; - } - pthread_mutex_unlock(&top.active_symbols_lock); - if (!err) - perf_top__tui_browser(&top); + const char *help = "For a higher level overview, try: perf top --sort comm,dso"; + + perf_top__sort_new_samples(&top); + perf_evlist__tui_browse_hists(top.evlist, help, + perf_top__sort_new_samples, + &top, top.delay_secs); + exit_browser(0); exit(0); return NULL; @@ -645,9 +613,8 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct map *map, struct symbol *sym) +static int symbol_filter(struct map *map __used, struct symbol *sym) { - struct sym_entry *syme; const char *name = sym->name; int i; @@ -667,16 +634,6 @@ static int symbol_filter(struct map *map, struct symbol *sym) strstr(name, "_text_end")) return 1; - syme = symbol__priv(sym); - syme->map = map; - symbol__annotate_init(map, sym); - - if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { - /* schedule initial sym_filter_entry setup */ - sym_filter_entry_sched = syme; - sym_filter = NULL; - } - for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { sym->ignore = true; @@ -692,7 +649,6 @@ static void perf_event__process_sample(const union perf_event *event, struct perf_session *session) { u64 ip = event->ip.ip; - struct sym_entry *syme; struct addr_location al; struct machine *machine; u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -783,46 +739,25 @@ static void perf_event__process_sample(const union perf_event *event, sleep(5); vmlinux_warned = true; } - - return; } - /* let's see, whether we need to install initial sym_filter_entry */ - if (sym_filter_entry_sched) { - top.sym_filter_entry = sym_filter_entry_sched; - sym_filter_entry_sched = NULL; - if (parse_source(top.sym_filter_entry) < 0) { - struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); - - pr_err("Can't annotate %s", sym->name); - if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) { - pr_err(": No vmlinux file was found in the path:\n"); - machine__fprintf_vmlinux_path(machine, stderr); - } else - pr_err(".\n"); - exit(1); - } - } - - syme = symbol__priv(al.sym); - if (!al.sym->ignore) { + if (al.sym == NULL || !al.sym->ignore) { struct perf_evsel *evsel; + struct hist_entry *he; evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); - syme->count[evsel->idx]++; - record_precise_ip(syme, al.map, evsel->idx, ip); - pthread_mutex_lock(&top.active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) { - static bool first = true; - __list_insert_active_sym(syme); - if (first) { - pthread_cond_broadcast(&top.active_symbols_cond); - first = false; - } + + he = perf_session__add_hist_entry(session, &al, sample, evsel); + if (he == NULL) { + pr_err("Problem incrementing symbol period, skipping event\n"); + return; } - pthread_mutex_unlock(&top.active_symbols_lock); + + record_precise_ip(he, evsel->idx, ip); } + + return; } static void perf_session__mmap_read_idx(struct perf_session *self, int idx) @@ -874,6 +809,7 @@ static void start_counters(struct perf_evlist *evlist) } attr->mmap = 1; + attr->comm = 1; attr->inherit = inherit; try_again: if (perf_evsel__open(counter, top.evlist->cpus, @@ -1019,7 +955,7 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), - OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", + OPT_STRING(0, "sym-annotate", &sym_filter, "symbol name", "symbol to annotate"), OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"), @@ -1033,6 +969,18 @@ static const struct option options[] = { OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_STRING('s', "sort", &sort_order, "key[,key2...]", + "sort by key(s): pid, comm, dso, symbol, parent"), + OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, + "Show a column with the number of samples"), + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, + "Show a column with the sum of periods"), + OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", + "only consider symbols in these dsos"), + OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]", + "only consider symbols in these comms"), + OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_END() }; @@ -1045,12 +993,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (top.evlist == NULL) return -ENOMEM; - page_size = sysconf(_SC_PAGE_SIZE); + symbol_conf.exclude_other = false; argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); + if (sort_order == default_sort_order) + sort_order = "dso,symbol"; + + setup_sorting(top_usage, options); + /* * XXX For now start disabled, only using TUI if explicitely asked for. * Change that when handle_keys equivalent gets written, live annotation @@ -1119,13 +1072,16 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); - symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) + - (top.evlist->nr_entries + 1) * sizeof(unsigned long)); + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init() < 0) return -1; + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + get_term_dimensions(&winsize); if (top.print_entries == 0) { update_print_entries(&winsize); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 80fe30d90d72..87ef5c7797de 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -92,6 +92,41 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self, } } +static void hist_entry__decay(struct hist_entry *he) +{ + he->period = (he->period * 7) / 8; + he->nr_events = (he->nr_events * 7) / 8; +} + +static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) +{ + hists->stats.total_period -= he->period; + hist_entry__decay(he); + hists->stats.total_period += he->period; + return he->period == 0; +} + +void hists__decay_entries(struct hists *hists) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + if (hists__decay_entry(hists, n)) { + rb_erase(&n->rb_node, &hists->entries); + + if (sort__need_collapse) + rb_erase(&n->rb_node_in, &hists->entries_collapsed); + + hist_entry__free(n); + --hists->nr_entries; + } + } +} + /* * histogram, sorted on item, collects periods */ @@ -635,6 +670,21 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, return ret; } +void hists__output_recalc_col_len(struct hists *hists, int max_rows) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + int row = 0; + + hists__reset_col_len(hists); + + while (next && row++ < max_rows) { + n = rb_entry(next, struct hist_entry, rb_node); + hists__calc_col_len(hists, n); + next = rb_next(&n->rb_node); + } +} + int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, struct hists *hists, struct hists *pair_hists, bool show_displacement, long displacement, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 424f9eb8310c..f87677bfaff9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -78,6 +78,9 @@ void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); void hists__collapse_resort_threaded(struct hists *hists); +void hists__decay_entries(struct hists *hists); +void hists__output_recalc_col_len(struct hists *hists, int max_rows); + void hists__inc_nr_events(struct hists *self, u32 type); size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index a11f60735a18..500471dffa4f 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -15,52 +15,6 @@ #include "top.h" #include -/* - * Ordering weight: count-1 * count-2 * ... / count-n - */ -static double sym_weight(const struct sym_entry *sym, struct perf_top *top) -{ - double weight = sym->snap_count; - int counter; - - if (!top->display_weighted) - return weight; - - for (counter = 1; counter < top->evlist->nr_entries - 1; counter++) - weight *= sym->count[counter]; - - weight /= (sym->count[counter] + 1); - - return weight; -} - -static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme) -{ - pthread_mutex_lock(&top->active_symbols_lock); - list_del_init(&syme->node); - pthread_mutex_unlock(&top->active_symbols_lock); -} - -static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) -{ - struct rb_node **p = &tree->rb_node; - struct rb_node *parent = NULL; - struct sym_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct sym_entry, rb_node); - - if (se->weight > iter->weight) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&se->rb_node, parent, p); - rb_insert_color(&se->rb_node, tree); -} - #define SNPRINTF(buf, size, fmt, args...) \ ({ \ size_t r = snprintf(buf, size, fmt, ## args); \ @@ -69,7 +23,6 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) { - struct perf_evsel *counter; float samples_per_sec = top->samples / top->delay_secs; float ksamples_per_sec = top->kernel_samples / top->delay_secs; float esamples_percent = (100.0 * top->exact_samples) / top->samples; @@ -104,7 +57,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) esamples_percent); } - if (top->evlist->nr_entries == 1 || !top->display_weighted) { + if (top->evlist->nr_entries == 1) { struct perf_evsel *first; first = list_entry(top->evlist->entries.next, struct perf_evsel, node); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", @@ -112,27 +65,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) top->freq ? "Hz" : ""); } - if (!top->display_weighted) { - ret += SNPRINTF(bf + ret, size - ret, "%s", - event_name(top->sym_evsel)); - } else { - /* - * Don't let events eat all the space. Leaving 30 bytes - * for the rest should be enough. - */ - size_t last_pos = size - 30; - - list_for_each_entry(counter, &top->evlist->entries, node) { - ret += SNPRINTF(bf + ret, size - ret, "%s%s", - counter->idx ? "/" : "", - event_name(counter)); - if (ret > last_pos) { - sprintf(bf + last_pos - 3, ".."); - ret = last_pos - 1; - break; - } - } - } + ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); @@ -166,73 +99,3 @@ void perf_top__reset_sample_counters(struct perf_top *top) top->exact_samples = top->guest_kernel_samples = top->guest_us_samples = 0; } - -float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) -{ - struct sym_entry *syme, *n; - float sum_ksamples = 0.0; - int snap = !top->display_weighted ? top->sym_evsel->idx : 0, j; - - /* Sort the active symbols */ - pthread_mutex_lock(&top->active_symbols_lock); - syme = list_entry(top->active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&top->active_symbols_lock); - - top->rb_entries = 0; - list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) { - syme->snap_count = syme->count[snap]; - if (syme->snap_count != 0) { - - if ((top->hide_user_symbols && - syme->map->dso->kernel == DSO_TYPE_USER) || - (top->hide_kernel_symbols && - syme->map->dso->kernel == DSO_TYPE_KERNEL)) { - perf_top__remove_active_sym(top, syme); - continue; - } - syme->weight = sym_weight(syme, top); - - if ((int)syme->snap_count >= top->count_filter) { - rb_insert_active_sym(root, syme); - ++top->rb_entries; - } - sum_ksamples += syme->snap_count; - - for (j = 0; j < top->evlist->nr_entries; j++) - syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8; - } else - perf_top__remove_active_sym(top, syme); - } - - return sum_ksamples; -} - -/* - * Find the longest symbol name that will be displayed - */ -void perf_top__find_widths(struct perf_top *top, struct rb_root *root, - int *dso_width, int *dso_short_width, int *sym_width) -{ - struct rb_node *nd; - int printed = 0; - - *sym_width = *dso_width = *dso_short_width = 0; - - for (nd = rb_first(root); nd; nd = rb_next(nd)) { - struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); - struct symbol *sym = sym_entry__symbol(syme); - - if (++printed > top->print_entries || - (int)syme->snap_count < top->count_filter) - continue; - - if (syme->map->dso->long_name_len > *dso_width) - *dso_width = syme->map->dso->long_name_len; - - if (syme->map->dso->short_name_len > *dso_short_width) - *dso_short_width = syme->map->dso->short_name_len; - - if (sym->namelen > *sym_width) - *sym_width = sym->namelen; - } -} diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b07b0410463c..01d1057f3074 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -4,64 +4,32 @@ #include "types.h" #include "../perf.h" #include -#include -#include -#include struct perf_evlist; struct perf_evsel; struct perf_session; -struct sym_entry { - struct rb_node rb_node; - struct list_head node; - unsigned long snap_count; - double weight; - struct map *map; - unsigned long count[0]; -}; - -static inline struct symbol *sym_entry__symbol(struct sym_entry *self) -{ - return ((void *)self) + symbol_conf.priv_size; -} - struct perf_top { struct perf_evlist *evlist; /* * Symbols will be added here in perf_event__process_sample and will * get out after decayed. */ - struct list_head active_symbols; - pthread_mutex_t active_symbols_lock; - pthread_cond_t active_symbols_cond; u64 samples; u64 kernel_samples, us_samples; u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; u64 total_lost_warned; int print_entries, count_filter, delay_secs; - int display_weighted, freq, rb_entries; + int freq; pid_t target_pid, target_tid; bool hide_kernel_symbols, hide_user_symbols, zero; const char *cpu_list; - struct sym_entry *sym_filter_entry; + struct hist_entry *sym_filter_entry; struct perf_evsel *sym_evsel; struct perf_session *session; }; size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); void perf_top__reset_sample_counters(struct perf_top *top); -float perf_top__decay_samples(struct perf_top *top, struct rb_root *root); -void perf_top__find_widths(struct perf_top *top, struct rb_root *root, - int *dso_width, int *dso_short_width, int *sym_width); - -#ifdef NO_NEWT_SUPPORT -static inline int perf_top__tui_browser(struct perf_top *top __used) -{ - return 0; -} -#else -int perf_top__tui_browser(struct perf_top *top); -#endif #endif /* __PERF_TOP_H */ diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c deleted file mode 100644 index d43875b2356f..000000000000 --- a/tools/perf/util/ui/browsers/top.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo - * - * Parts came from builtin-{top,stat,record}.c, see those files for further - * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) - */ -#include "../browser.h" -#include "../../annotate.h" -#include "../helpline.h" -#include "../libslang.h" -#include "../util.h" -#include "../ui.h" -#include "../../evlist.h" -#include "../../hist.h" -#include "../../sort.h" -#include "../../symbol.h" -#include "../../session.h" -#include "../../top.h" - -struct perf_top_browser { - struct ui_browser b; - struct rb_root root; - struct sym_entry *selection; - float sum_ksamples; - int dso_width; - int dso_short_width; - int sym_width; -}; - -static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row) -{ - struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b); - struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node); - bool current_entry = ui_browser__is_current_entry(browser, row); - struct symbol *symbol = sym_entry__symbol(syme); - struct perf_top *top = browser->priv; - int width = browser->width; - double pcnt; - - pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) / - top_browser->sum_ksamples)); - ui_browser__set_percent_color(browser, pcnt, current_entry); - - if (top->evlist->nr_entries == 1 || !top->display_weighted) { - slsmg_printf("%20.2f ", syme->weight); - width -= 21; - } else { - slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count); - width -= 20; - } - - slsmg_printf("%4.1f%%", pcnt); - width -= 7; - - if (verbose) { - slsmg_printf(" %016" PRIx64, symbol->start); - width -= 17; - } - - slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width, - symbol->name); - width -= top_browser->sym_width; - slsmg_write_nstring(width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name, width); - - if (current_entry) - top_browser->selection = syme; -} - -static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) -{ - struct perf_top *top = browser->b.priv; - u64 top_idx = browser->b.top_idx; - - browser->root = RB_ROOT; - browser->b.top = NULL; - browser->sum_ksamples = perf_top__decay_samples(top, &browser->root); - /* - * No active symbols - */ - if (top->rb_entries == 0) - return; - - perf_top__find_widths(top, &browser->root, &browser->dso_width, - &browser->dso_short_width, - &browser->sym_width); - if (browser->sym_width + browser->dso_width > browser->b.width - 29) { - browser->dso_width = browser->dso_short_width; - if (browser->sym_width + browser->dso_width > browser->b.width - 29) - browser->sym_width = browser->b.width - browser->dso_width - 29; - } - - /* - * Adjust the ui_browser indexes since the entries in the browser->root - * rb_tree may have changed, then seek it from start, so that we get a - * possible new top of the screen. - */ - browser->b.nr_entries = top->rb_entries; - - if (top_idx >= browser->b.nr_entries) { - if (browser->b.height >= browser->b.nr_entries) - top_idx = browser->b.nr_entries - browser->b.height; - else - top_idx = 0; - } - - if (browser->b.index >= top_idx + browser->b.height) - browser->b.index = top_idx + browser->b.index - browser->b.top_idx; - - if (browser->b.index >= browser->b.nr_entries) - browser->b.index = browser->b.nr_entries - 1; - - browser->b.top_idx = top_idx; - browser->b.seek(&browser->b, top_idx, SEEK_SET); -} - -static void perf_top_browser__annotate(struct perf_top_browser *browser) -{ - struct sym_entry *syme = browser->selection; - struct symbol *sym = sym_entry__symbol(syme); - struct annotation *notes = symbol__annotation(sym); - struct perf_top *top = browser->b.priv; - - if (notes->src != NULL) - goto do_annotation; - - pthread_mutex_lock(¬es->lock); - - top->sym_filter_entry = NULL; - - if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) { - pr_err("Not enough memory for annotating '%s' symbol!\n", - sym->name); - pthread_mutex_unlock(¬es->lock); - return; - } - - top->sym_filter_entry = syme; - - pthread_mutex_unlock(¬es->lock); -do_annotation: - symbol__tui_annotate(sym, syme->map, 0, NULL, NULL, top->delay_secs * 1000); -} - -static void perf_top_browser__warn_lost(struct perf_top_browser *browser) -{ - struct perf_top *top = browser->b.priv; - char msg[128]; - int len; - - top->total_lost_warned = top->session->hists.stats.total_lost; - pthread_mutex_lock(&ui__lock); - ui_browser__set_color(&browser->b, HE_COLORSET_TOP); - len = snprintf(msg, sizeof(msg), - " WARNING: LOST %" PRIu64 " events, Check IO/CPU overload", - top->total_lost_warned); - if (len > browser->b.width) - len = browser->b.width; - SLsmg_gotorc(0, browser->b.width - len); - slsmg_write_nstring(msg, len); - pthread_mutex_unlock(&ui__lock); -} - -static int perf_top_browser__run(struct perf_top_browser *browser) -{ - int key; - char title[160]; - struct perf_top *top = browser->b.priv; - int delay_msecs = top->delay_secs * 1000; - int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; - - perf_top_browser__update_rb_tree(browser); - perf_top__header_snprintf(top, title, sizeof(title)); - perf_top__reset_sample_counters(top); - - if (ui_browser__show(&browser->b, title, - "ESC: exit, ENTER|->|a: Live Annotate") < 0) - return -1; - - newtFormSetTimer(browser->b.form, delay_msecs); - ui_browser__add_exit_keys(&browser->b, exit_keys); - - while (1) { - key = ui_browser__run(&browser->b); - - switch (key) { - case -1: - /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ - perf_top_browser__update_rb_tree(browser); - perf_top__header_snprintf(top, title, sizeof(title)); - perf_top__reset_sample_counters(top); - ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT); - SLsmg_gotorc(0, 0); - slsmg_write_nstring(title, browser->b.width); - - if (top->total_lost_warned != top->session->hists.stats.total_lost) - perf_top_browser__warn_lost(browser); - break; - case 'a': - case NEWT_KEY_RIGHT: - case NEWT_KEY_ENTER: - if (browser->selection) - perf_top_browser__annotate(browser); - break; - case NEWT_KEY_LEFT: - continue; - case NEWT_KEY_ESCAPE: - if (!ui__dialog_yesno("Do you really want to exit?")) - continue; - /* Fall thru */ - default: - goto out; - } - } -out: - ui_browser__hide(&browser->b); - return key; -} - -int perf_top__tui_browser(struct perf_top *top) -{ - struct perf_top_browser browser = { - .b = { - .entries = &browser.root, - .refresh = ui_browser__rb_tree_refresh, - .seek = ui_browser__rb_tree_seek, - .write = perf_top_browser__write, - .priv = top, - }, - }; - - return perf_top_browser__run(&browser); -} From 19d4ac3c1039fb952f4c073fd0898e9295a836c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:30:22 -0300 Subject: [PATCH 07/16] perf top: Add callgraph support Just like in 'perf report', but live. Still needs to decay the callchains, but already somewhat useful as-is. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cj3rmaf5jpsvi3v0tf7t4uvp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 15 +++ tools/perf/builtin-top.c | 146 +++++++++++++++++++++++++- 2 files changed, 159 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index d146ba33d691..e2e5cd0023cd 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -126,6 +126,21 @@ Default is to monitor all CPUS. --symbols:: Only consider these symbols. +-G [type,min,order]:: +--call-graph:: + Display call chains using type, min percent threshold and order. + type can be either: + - flat: single column, linear exposure of call chains. + - graph: use a graph tree, displaying absolute overhead rates. + - fractal: like graph, but displays relative rates. Each branch of + the tree is considered as a new profiled object. + + order can be either: + - callee: callee based call graph. + - caller: inverted caller based call graph. + + Default: fractal,0.5,callee. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2cf5e50a6997..b9b7fe085895 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -76,6 +76,12 @@ static bool system_wide = false; static bool use_tui, use_stdio; +static bool sort_has_symbols; + +static bool dont_use_callchains; +static char callchain_default_opt[] = "fractal,0.5,callee"; + + static int default_interval = 0; static bool kptr_restrict_warned; @@ -648,9 +654,11 @@ static void perf_event__process_sample(const union perf_event *event, struct perf_sample *sample, struct perf_session *session) { + struct symbol *parent = NULL; u64 ip = event->ip.ip; struct addr_location al; struct machine *machine; + int err; u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; ++top.samples; @@ -748,13 +756,29 @@ static void perf_event__process_sample(const union perf_event *event, evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); + if ((sort__has_parent || symbol_conf.use_callchain) && + sample->callchain) { + err = perf_session__resolve_callchain(session, al.thread, + sample->callchain, &parent); + if (err) + return; + } + he = perf_session__add_hist_entry(session, &al, sample, evsel); if (he == NULL) { pr_err("Problem incrementing symbol period, skipping event\n"); return; } - record_precise_ip(he, evsel->idx, ip); + if (symbol_conf.use_callchain) { + err = callchain_append(he->callchain, &session->callchain_cursor, + sample->period); + if (err) + return; + } + + if (sort_has_symbols) + record_precise_ip(he, evsel->idx, ip); } return; @@ -808,6 +832,9 @@ static void start_counters(struct perf_evlist *evlist) attr->read_format |= PERF_FORMAT_ID; } + if (symbol_conf.use_callchain) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = 1; attr->comm = 1; attr->inherit = inherit; @@ -864,10 +891,27 @@ out_err: exit(0); } +static int setup_sample_type(void) +{ + if (!sort_has_symbols) { + if (symbol_conf.use_callchain) { + ui__warning("Selected -g but \"sym\" not present in --sort/-s."); + return -EINVAL; + } + } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) { + if (callchain_register_param(&callchain_param) < 0) { + ui__warning("Can't register callchain params.\n"); + return -EINVAL; + } + } + + return 0; +} + static int __cmd_top(void) { pthread_t thread; - int ret __used; + int ret; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. @@ -876,6 +920,10 @@ static int __cmd_top(void) if (top.session == NULL) return -ENOMEM; + ret = setup_sample_type(); + if (ret) + goto out_delete; + if (top.target_tid != -1) perf_event__synthesize_thread_map(top.evlist->threads, perf_event__process, top.session); @@ -916,6 +964,90 @@ static int __cmd_top(void) ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); } +out_delete: + perf_session__delete(top.session); + top.session = NULL; + + return 0; +} + +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset) +{ + char *tok, *tok2; + char *endptr; + + /* + * --no-call-graph + */ + if (unset) { + dont_use_callchains = true; + return 0; + } + + symbol_conf.use_callchain = true; + + if (!arg) + return 0; + + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_ABS; + + else if (!strncmp(tok, "flat", strlen(arg))) + callchain_param.mode = CHAIN_FLAT; + + else if (!strncmp(tok, "fractal", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_REL; + + else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + symbol_conf.use_callchain = false; + + return 0; + } + + else + return -1; + + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + goto setup; + + callchain_param.min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + + /* get the print limit */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + + if (tok2[0] != 'c') { + callchain_param.print_limit = strtod(tok2, &endptr); + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + } + + /* get the call chain order */ + if (!strcmp(tok2, "caller")) + callchain_param.order = ORDER_CALLER; + else if (!strcmp(tok2, "callee")) + callchain_param.order = ORDER_CALLEE; + else + return -1; +setup: + if (callchain_register_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain params\n"); + return -1; + } return 0; } @@ -973,6 +1105,10 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), + OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order", + "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " + "Default: fractal,0.5,callee", &parse_callchain_opt, + callchain_default_opt), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -1082,6 +1218,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + /* + * Avoid annotation data structures overhead when symbols aren't on the + * sort list. + */ + sort_has_symbols = sort_sym.list.next != NULL; + get_term_dimensions(&winsize); if (top.print_entries == 0) { update_print_entries(&winsize); From 34958544b3da17e98d98d7b872c6516995ad66bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:35:54 -0300 Subject: [PATCH 08/16] perf annotate browser: Allow navigation to called functions I.e. when in the annotate TUI window, if Enter is pressed over an assembly line with a 'callq' it will try to open another TUI window with that symbol. This is just a proof of concept and works only on x86_64, more work is needed to support kernel modules, userland, other arches, etc, but should already be useful as-is. Suggested-by: Ingo Molnar Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-opyvskw5na3qdmkv8vxi3zbr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 9 ++-- tools/perf/util/annotate.h | 2 +- tools/perf/util/hist.h | 4 +- tools/perf/util/ui/browsers/annotate.c | 69 ++++++++++++++++++++++---- tools/perf/util/ui/browsers/hists.c | 20 +++++--- 5 files changed, 79 insertions(+), 25 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 39e3d382b2d8..24db9d2db7b4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -114,7 +114,8 @@ static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) print_line, full_paths, 0, 0); } -static void hists__find_annotations(struct hists *self, int evidx) +static void hists__find_annotations(struct hists *self, int evidx, + int nr_events) { struct rb_node *nd = rb_first(&self->entries), *next; int key = KEY_RIGHT; @@ -137,7 +138,8 @@ find_next: } if (use_browser > 0) { - key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0); + key = hist_entry__tui_annotate(he, evidx, nr_events, + NULL, NULL, 0); switch (key) { case KEY_RIGHT: next = rb_next(nd); @@ -215,7 +217,8 @@ static int __cmd_annotate(void) total_nr_samples += nr_samples; hists__collapse_resort(hists); hists__output_resort(hists); - hists__find_annotations(hists, pos->idx); + hists__find_annotations(hists, pos->idx, + session->evlist->nr_entries); } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index aafbc7e9a9ba..d9072523d342 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -99,7 +99,7 @@ static inline int symbol__tui_annotate(struct symbol *sym __used, } #else int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - void(*timer)(void *arg), void *arg, + int nr_events, void(*timer)(void *arg), void *arg, int delay_secs); #endif diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f87677bfaff9..7ea1e560e008 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -110,7 +110,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, } static inline int hist_entry__tui_annotate(struct hist_entry *self __used, - int evidx __used, + int evidx __used, int nr_events __used, void(*timer)(void *arg) __used, void *arg __used, int delay_secs __used); { @@ -120,7 +120,7 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used, #define KEY_RIGHT -2 #else #include -int hist_entry__tui_annotate(struct hist_entry *he, int evidx, +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, void(*timer)(void *arg), void *arg, int delay_secs); #define KEY_LEFT NEWT_KEY_LEFT diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 76c1d083aa94..77421bab8fdd 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -20,6 +20,7 @@ struct annotate_browser { struct ui_browser b; struct rb_root entries; struct rb_node *curr_hot; + struct objdump_line *selection; }; struct objdump_line_rb_node { @@ -36,6 +37,7 @@ struct objdump_line_rb_node *objdump_line__rb(struct objdump_line *self) static void annotate_browser__write(struct ui_browser *self, void *entry, int row) { + struct annotate_browser *ab = container_of(self, struct annotate_browser, b); struct objdump_line *ol = rb_entry(entry, struct objdump_line, node); bool current_entry = ui_browser__is_current_entry(self, row); int width = self->width; @@ -58,6 +60,8 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro if (!current_entry) ui_browser__set_color(self, HE_COLORSET_CODE); + else + ab->selection = ol; } static double objdump_line__calc_percent(struct objdump_line *self, @@ -141,7 +145,8 @@ static void annotate_browser__set_top(struct annotate_browser *self, static void annotate_browser__calc_percent(struct annotate_browser *browser, int evidx) { - struct symbol *sym = browser->b.priv; + struct map_symbol *ms = browser->b.priv; + struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); struct objdump_line *pos; @@ -164,17 +169,18 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } static int annotate_browser__run(struct annotate_browser *self, int evidx, - void(*timer)(void *arg) __used, void *arg __used, - int delay_secs) + int nr_events, void(*timer)(void *arg), + void *arg, int delay_secs) { struct rb_node *nd = NULL; - struct symbol *sym = self->b.priv; + struct map_symbol *ms = self->b.priv; + struct symbol *sym = ms->sym; /* * RIGHT To allow builtin-annotate to cycle thru multiple symbols by * examining the exit key for this function. */ int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB, - NEWT_KEY_RIGHT, 0 }; + NEWT_KEY_RIGHT, NEWT_KEY_ENTER, 0 }; int key; if (ui_browser__show(&self->b, sym->name, @@ -238,6 +244,42 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, case 'H': nd = self->curr_hot; break; + case NEWT_KEY_ENTER: + if (self->selection != NULL) { + char *s = strstr(self->selection->line, "callq "); + struct annotation *notes; + struct symbol *target; + u64 ip; + + if (s == NULL) + continue; + + s = strchr(s, ' '); + if (s++ == NULL) + continue; + + ip = strtoull(s, NULL, 16); + ip = ms->map->map_ip(ms->map, ip); + target = map__find_symbol(ms->map, ip, NULL); + if (target == NULL) + continue; + + notes = symbol__annotation(target); + pthread_mutex_lock(¬es->lock); + + if (notes->src == NULL && + symbol__alloc_hist(target, nr_events) < 0) { + pthread_mutex_unlock(¬es->lock); + ui__warning("Not enough memory for annotating '%s' symbol!\n", + target->name); + continue; + } + + pthread_mutex_unlock(¬es->lock); + symbol__tui_annotate(target, ms->map, evidx, nr_events, + timer, arg, delay_secs); + } + break; default: goto out; } @@ -250,25 +292,29 @@ out: return key; } -int hist_entry__tui_annotate(struct hist_entry *he, int evidx, +int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, void(*timer)(void *arg), void *arg, int delay_secs) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, nr_events, timer, arg, delay_secs); } int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, - void(*timer)(void *arg), void *arg, - int delay_secs) + int nr_events, void(*timer)(void *arg), void *arg, + int delay_secs) { struct objdump_line *pos, *n; struct annotation *notes; + struct map_symbol ms = { + .map = map, + .sym = sym, + }; struct annotate_browser browser = { .b = { .refresh = ui_browser__list_head_refresh, .seek = ui_browser__list_head_seek, .write = annotate_browser__write, - .priv = sym, + .priv = &ms, }, }; int ret; @@ -300,7 +346,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs); + ret = annotate_browser__run(&browser, evidx, nr_events, + timer, arg, delay_secs); list_for_each_entry_safe(pos, n, ¬es->src->source, node) { list_del(&pos->node); objdump_line__free(pos); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 6244d19bd1f2..f0515cbabda5 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -825,7 +825,7 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, return printed; } -static int perf_evsel__hists_browse(struct perf_evsel *evsel, +static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, const char *ev_name, bool left_exits, void(*timer)(void *arg), void *arg, @@ -968,7 +968,7 @@ do_annotate: if (he == NULL) continue; - hist_entry__tui_annotate(he, evsel->idx, + hist_entry__tui_annotate(he, evsel->idx, nr_events, timer, arg, delay_secs); } else if (choice == browse_map) map__browse(browser->selection->map); @@ -1042,7 +1042,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser, menu->selection = evsel; } -static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help, +static int perf_evsel_menu__run(struct perf_evsel_menu *menu, + int nr_events, const char *help, void(*timer)(void *arg), void *arg, int delay_secs) { int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; @@ -1077,8 +1078,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help, perf_evlist__set_selected(evlist, pos); browse_hists: ev_name = event_name(pos); - key = perf_evsel__hists_browse(pos, help, ev_name, true, - timer, arg, delay_secs); + key = perf_evsel__hists_browse(pos, nr_events, help, + ev_name, true, timer, + arg, delay_secs); ui_browser__show_title(&menu->b, title); break; case NEWT_KEY_LEFT: @@ -1150,7 +1152,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, pos->name = strdup(ev_name); } - return perf_evsel_menu__run(&menu, help, timer, arg, delay_secs); + return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer, + arg, delay_secs); } int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, @@ -1162,8 +1165,9 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); const char *ev_name = event_name(first); - return perf_evsel__hists_browse(first, help, ev_name, false, - timer, arg, delay_secs); + return perf_evsel__hists_browse(first, evlist->nr_entries, help, + ev_name, false, timer, arg, + delay_secs); } return __perf_evlist__tui_browse_hists(evlist, help, From 8b1bfdbdb3041c0503c42ef49bab25caabeaa558 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Oct 2011 19:41:31 -0300 Subject: [PATCH 09/16] perf top: Use the TUI interface by default To disable it either: 1. Make sure newt-devel is not installed when building it 2. Use 'perf top --stdio' just like with report 3. Edit your ~/.perfconfig or system wide config and have this there: [tui] top = off But you shouldn't, since the TUI is so much more powerful, has integration with annotation and where lots more interesting features will be developed, so if something annoys you (the colors?) just let me know and I'll do my best to make it pleasant as a default. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cy2tn4uj1t7c3aqss5l25of5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b9b7fe085895..cc877bc83ebd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1140,13 +1140,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) setup_sorting(top_usage, options); - /* - * XXX For now start disabled, only using TUI if explicitely asked for. - * Change that when handle_keys equivalent gets written, live annotation - * done, etc. - */ - use_browser = 0; - if (use_stdio) use_browser = 0; else if (use_tui) From e39622ceb169467dbe3d11491745aa1f7f3a92ad Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 3 Oct 2011 11:38:15 +0200 Subject: [PATCH 10/16] perf tools: Fix broken number of samples for perf report -n The perf report -n option was broken because it was not reporting the correct number of samples depending on the sorting mode. By default, samples are sorted by comm,dso,sym. That means that samples for the same command (binary) get collapsed. The hists__collapse_insert_entry() had a bug whereby it was aggregating the number of events observed (periods) but not the number of samples. Consequently, the number of samples reported could be below reality. The percentage remained correct because based on the periods. This patch fixes the problem by also aggregating the number of samples. Here is an example: $ perf report -n --stdio 12.38% 842 pong [kernel.kallsyms] [k] __lock_acquire Here pong (a ctxsw stress test), is the only program running and thus it is the only one responsible for the lock_acquire samples. If we change the sorting mode: $ perf report -n --stdio --sort=sym 12.38% 1732 [k] __lock_acquire The actual number of samples is shown. With the fix: $ perf report -n --stdio 12.38% 1732 pong [kernel.kallsyms] [k] __lock_acquire Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20111003093815.GA6393@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 87ef5c7797de..50c8fece1681 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -281,6 +281,7 @@ static bool hists__collapse_insert_entry(struct hists *hists, if (!cmp) { iter->period += he->period; + iter->nr_events += he->nr_events; if (symbol_conf.use_callchain) { callchain_cursor_reset(&hists->callchain_cursor); callchain_merge(&hists->callchain_cursor, iter->callchain, From 234a5375f6e7dee1f8bd5a7a2f5107c1c265ad8e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 09:45:29 -0300 Subject: [PATCH 11/16] perf annotate browser: Use -> to navigate on assembly lines And add better explanations when the line isn't actionable, like non assembly lines and on other instructions. Suggested-by: Ingo Molnar Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-375n844b5wra7lgq08ou153j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/annotate.c | 27 ++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 77421bab8fdd..674b55e686fd 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -184,8 +184,8 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, int key; if (ui_browser__show(&self->b, sym->name, - "<-, -> or ESC: exit, TAB/shift+TAB: " - "cycle hottest lines, H: Hottest") < 0) + "<- or ESC: exit, TAB/shift+TAB: " + "cycle hottest lines, H: Hottest, -> Line action") < 0) return -1; ui_browser__add_exit_keys(&self->b, exit_keys); @@ -245,24 +245,39 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, nd = self->curr_hot; break; case NEWT_KEY_ENTER: - if (self->selection != NULL) { + case NEWT_KEY_RIGHT: + if (self->selection == NULL) { + ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); + continue; + } + + if (self->selection->offset == -1) { + ui_helpline__puts("Actions are only available for assembly lines."); + continue; + } else { char *s = strstr(self->selection->line, "callq "); struct annotation *notes; struct symbol *target; u64 ip; - if (s == NULL) + if (s == NULL) { + ui_helpline__puts("Actions are only available for the 'callq' instruction."); continue; + } s = strchr(s, ' '); - if (s++ == NULL) + if (s++ == NULL) { + ui_helpline__puts("Invallid callq instruction."); continue; + } ip = strtoull(s, NULL, 16); ip = ms->map->map_ip(ms->map, ip); target = map__find_symbol(ms->map, ip, NULL); - if (target == NULL) + if (target == NULL) { + ui_helpline__puts("The called function was not found."); continue; + } notes = symbol__annotation(target); pthread_mutex_lock(¬es->lock); From 724c9c9f20c7a0ebd9a728391f5adcfd4a76628b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 10:36:35 -0300 Subject: [PATCH 12/16] perf hists browser: Don't offer symbol actions when symbols not on --sort Removing all the entries that only apply to symbols from the menu. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7bap0cy2fxtorlj5hgsp48m1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/hists.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index f0515cbabda5..b7901099eedb 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -26,6 +26,7 @@ struct hist_browser { struct map_symbol *selection; const struct thread *thread_filter; const struct dso *dso_filter; + bool has_symbols; }; static int hists__browser_title(struct hists *self, char *bf, size_t size, @@ -302,9 +303,9 @@ static int hist_browser__run(struct hist_browser *self, const char *ev_name, int key; int delay_msecs = delay_secs * 1000; char title[160]; - int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', - NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, - NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, }; + int sym_exit_keys[] = { 'a', 'h', 'C', 'd', 'E', 't', 0, }; + int exit_keys[] = { '?', 'h', 'D', NEWT_KEY_LEFT, NEWT_KEY_RIGHT, + NEWT_KEY_TAB, NEWT_KEY_UNTAB, NEWT_KEY_ENTER, 0, }; self->b.entries = &self->hists->entries; self->b.nr_entries = self->hists->nr_entries; @@ -321,6 +322,8 @@ static int hist_browser__run(struct hist_browser *self, const char *ev_name, newtFormSetTimer(self->b.form, delay_msecs); ui_browser__add_exit_keys(&self->b, exit_keys); + if (self->has_symbols) + ui_browser__add_exit_keys(&self->b, sym_exit_keys); while (1) { key = ui_browser__run(&self->b); @@ -783,6 +786,7 @@ static struct hist_browser *hist_browser__new(struct hists *hists) self->hists = hists; self->b.refresh = hist_browser__refresh; self->b.seek = ui_browser__hists_seek; + self->has_symbols = sort_sym.list.next != NULL; } return self; @@ -881,16 +885,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case NEWT_KEY_F1: case 'h': case '?': - ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" + ui__help_window("h/?/F1 Show this window\n" + "TAB/UNTAB Switch events\n" + "q/CTRL+C Exit browser\n\n" + "For symbolic views (--sort has sym):\n\n" + "-> Zoom into DSO/Threads & Annotate current symbol\n" "<- Zoom out\n" "a Annotate current symbol\n" - "h/?/F1 Show this window\n" "C Collapse all callchains\n" "E Expand all callchains\n" "d Zoom into current DSO\n" - "t Zoom into current Thread\n" - "TAB/UNTAB Switch events\n" - "q/CTRL+C Exit browser"); + "t Zoom into current Thread\n"); continue; case NEWT_KEY_ENTER: case NEWT_KEY_RIGHT: @@ -923,6 +928,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, goto out_free_stack; } + if (!browser->has_symbols) + goto add_exit_option; + if (browser->selection != NULL && browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned && @@ -947,7 +955,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, browser->selection->map != NULL && asprintf(&options[nr_options], "Browse map details") > 0) browse_map = nr_options++; - +add_exit_option: options[nr_options++] = (char *)"Exit"; choice = ui__popup_menu(nr_options, options); From 7d16320e2315db434b28854c08677e0d15b604f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 10:46:45 -0300 Subject: [PATCH 13/16] perf hists browser: Fix TAB/UNTAB use with multiple events When requesting multiple events, say: # perf top -e instructions -e cycles -e cache-misses The first screen lets the user chose what to see first, then to switch one can either use the left key to get back to the event menu or simply use TAB to go the next and shift+TAB to go the prev. When using TAB/UNTAB the call to perf_evlist__set_selected(event) was missing, fix it. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3xqqh3fwmt914gg43frey14y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/hists.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index b7901099eedb..ef6ccefad432 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -1107,12 +1107,14 @@ browse_hists: pos = list_entry(evlist->entries.next, struct perf_evsel, node); else pos = list_entry(pos->node.next, struct perf_evsel, node); + perf_evlist__set_selected(evlist, pos); goto browse_hists; case NEWT_KEY_UNTAB: if (pos->node.prev == &evlist->entries) pos = list_entry(evlist->entries.prev, struct perf_evsel, node); else pos = list_entry(pos->node.prev, struct perf_evsel, node); + perf_evlist__set_selected(evlist, pos); goto browse_hists; case 'q': case CTRL('c'): From be83f5ed6bc46cd89b4a102b6e341ecddf7abf91 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 10:56:05 -0300 Subject: [PATCH 14/16] perf hists browser: Update the browser.nr_entries after the timer Previously the hist_browser dealt with a static tree of entries, now it needs to update the nr_entries in the browser after the timer runs. A better solution will come when moving using another thread for the collapse_resort, etc, but for now this is ok. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9eno2iq55sjr4iyo899buzaw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/hists.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index ef6ccefad432..e64d9527f14e 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -332,6 +332,13 @@ static int hist_browser__run(struct hist_browser *self, const char *ev_name, case -1: /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ timer(arg); + /* + * The timer may have changed the number of entries. + * XXX: Find better way to keep this in synch, probably + * removing this timer function altogether and just sync + * using the hists->lock... + */ + self->b.nr_entries = self->hists->nr_entries; hists__browser_title(self->hists, title, sizeof(title), ev_name, self->dso_filter, self->thread_filter); From fbe96f29ce4b33e0a22219cc7f5996d9157717e3 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 30 Sep 2011 15:40:40 +0200 Subject: [PATCH 15/16] perf tools: Make perf.data more self-descriptive (v8) The goal of this patch is to include more information about the host environment into the perf.data so it is more self-descriptive. Overtime, profiles are captured on various machines and it becomes hard to track what was recorded, on what machine and when. This patch provides a way to solve this by extending the perf.data file with basic information about the host machine. To add those extensions, we leverage the feature bits capabilities of the perf.data format. The change is backward compatible with existing perf.data files. We define the following useful new extensions: - HEADER_HOSTNAME: the hostname - HEADER_OSRELEASE: the kernel release number - HEADER_ARCH: the hw architecture - HEADER_CPUDESC: generic CPU description - HEADER_NRCPUS: number of online/avail cpus - HEADER_CMDLINE: perf command line - HEADER_VERSION: perf version - HEADER_TOPOLOGY: cpu topology - HEADER_EVENT_DESC: full event description (attrs) - HEADER_CPUID: easy-to-parse low level CPU identication The small granularity for the entries is to make it easier to extend without breaking backward compatiblity. Many entries are provided as ASCII strings. Perf report/script have been modified to print the basic information as easy-to-parse ASCII strings. Extended information about CPU and NUMA topology may be requested with the -I option. Thanks to David Ahern for reviewing and testing the many versions of this patch. $ perf report --stdio # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory : 8105360 kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # ======== # ... $ perf report --stdio -I # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory : 8105360 kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # sibling cores : 0-3 # sibling threads : 0 # sibling threads : 1 # sibling threads : 2 # sibling threads : 3 # node0 meminfo : total = 8320608 kB, free = 7571024 kB # node0 cpu list : 0-3 # ======== # ... Reviewed-by: David Ahern Tested-by: David Ahern Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Robert Richter Cc: Andi Kleen Link: http://lkml.kernel.org/r/20110930134040.GA5575@quad Signed-off-by: Stephane Eranian [ committer notes: Use --show-info in the tools as was in the docs, rename perf_header_fprintf_info to perf_file_section__fprintf_info, fixup conflict with f69b64f7 "perf: Support setting the disassembler style" ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 6 + tools/perf/Documentation/perf-script.txt | 7 + tools/perf/arch/powerpc/Makefile | 1 + tools/perf/arch/powerpc/util/header.c | 36 + tools/perf/arch/x86/Makefile | 1 + tools/perf/arch/x86/util/header.c | 59 ++ tools/perf/builtin-record.c | 15 + tools/perf/builtin-report.c | 6 + tools/perf/builtin-script.c | 6 +- tools/perf/builtin.h | 1 - tools/perf/perf.h | 11 + tools/perf/util/header.c | 1145 +++++++++++++++++++++- tools/perf/util/header.h | 29 +- tools/perf/util/session.c | 19 + tools/perf/util/session.h | 1 + 15 files changed, 1308 insertions(+), 35 deletions(-) create mode 100644 tools/perf/arch/powerpc/util/header.c create mode 100644 tools/perf/arch/x86/util/header.c diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4e82c19cbbbe..4ed17072ea88 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -139,6 +139,12 @@ OPTIONS --show-total-period:: Show a column with the sum of periods. +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index db017867d9e8..dec87ecb530e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -188,6 +188,13 @@ OPTIONS CPUs are specified with -: 0-2. Default is to report samples on all CPUs. +-I:: +--show-info:: + Display extended information about the perf.data file. This adds + information which may be very large and thus may clutter the display. + It currently includes: cpu and numa topology of the host system. + It can only be used with the perf script report mode. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 15130b50dfe3..744e629797be 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c new file mode 100644 index 000000000000..eba80c292945 --- /dev/null +++ b/tools/perf/arch/powerpc/util/header.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include + +#include "../../util/header.h" + +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval; }) + +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +int +get_cpuid(char *buffer, size_t sz) +{ + unsigned long pvr; + int nb; + + pvr = mfspr(SPRN_PVR); + + nb = snprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr)); + + /* look for end marker to ensure the entire data fit */ + if (strchr(buffer, '$')) { + buffer[nb-1] = '\0'; + return 0; + } + return -1; +} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 15130b50dfe3..744e629797be 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c new file mode 100644 index 000000000000..f94006068d2b --- /dev/null +++ b/tools/perf/arch/x86/util/header.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +#include "../../util/header.h" + +static inline void +cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, + unsigned int *d) +{ + __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t" + "movl %%ebx, %%esi\n\t.byte 0x5b" + : "=a" (*a), + "=S" (*b), + "=c" (*c), + "=d" (*d) + : "a" (op)); +} + +int +get_cpuid(char *buffer, size_t sz) +{ + unsigned int a, b, c, d, lvl; + int family = -1, model = -1, step = -1; + int nb; + char vendor[16]; + + cpuid(0, &lvl, &b, &c, &d); + strncpy(&vendor[0], (char *)(&b), 4); + strncpy(&vendor[4], (char *)(&d), 4); + strncpy(&vendor[8], (char *)(&c), 4); + vendor[12] = '\0'; + + if (lvl >= 1) { + cpuid(1, &a, &b, &c, &d); + + family = (a >> 8) & 0xf; /* bits 11 - 8 */ + model = (a >> 4) & 0xf; /* Bits 7 - 4 */ + step = a & 0xf; + + /* extended family */ + if (family == 0xf) + family += (a >> 20) & 0xff; + + /* extended model */ + if (family >= 0x6) + model += ((a >> 16) & 0xf) << 4; + } + nb = snprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step); + + /* look for end marker to ensure the entire data fit */ + if (strchr(buffer, '$')) { + buffer[nb-1] = '\0'; + return 0; + } + return -1; +} diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dd6467872f60..f82480fa7f27 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -529,6 +529,19 @@ static int __cmd_record(int argc, const char **argv) if (have_tracepoints(&evsel_list->entries)) perf_header__set_feat(&session->header, HEADER_TRACE_INFO); + perf_header__set_feat(&session->header, HEADER_HOSTNAME); + perf_header__set_feat(&session->header, HEADER_OSRELEASE); + perf_header__set_feat(&session->header, HEADER_ARCH); + perf_header__set_feat(&session->header, HEADER_CPUDESC); + perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_EVENT_DESC); + perf_header__set_feat(&session->header, HEADER_CMDLINE); + perf_header__set_feat(&session->header, HEADER_VERSION); + perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); + perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); + perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); + perf_header__set_feat(&session->header, HEADER_CPUID); + /* 512 kiB: default amount of unprivileged mlocked memory */ if (mmap_pages == UINT_MAX) mmap_pages = (512 * 1024) / page_size; @@ -800,6 +813,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) int err = -ENOMEM; struct perf_evsel *pos; + perf_header__set_cmdline(argc, argv); + evsel_list = perf_evlist__new(NULL, NULL); if (evsel_list == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e7140c6289b8..66fe822b181d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,6 +40,7 @@ static char const *input_name = "perf.data"; static bool force, use_tui, use_stdio; static bool hide_unresolved; static bool dont_use_callchains; +static bool show_full_info; static bool show_threads; static struct perf_read_values show_threads_values; @@ -273,6 +274,9 @@ static int __cmd_report(void) goto out_delete; } + if (use_browser <= 0) + perf_session__fprintf_info(session, stdout, show_full_info); + if (show_threads) perf_read_values_init(&show_threads_values); @@ -485,6 +489,8 @@ static const struct option options[] = { OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), + OPT_BOOLEAN('I', "show-info", &show_full_info, + "Display extended information about perf.data file"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 09024ec2ab2e..2f62a2952269 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -22,6 +22,7 @@ static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; static bool no_callchain; +static bool show_full_info; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -1083,7 +1084,8 @@ static const struct option options[] = { "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", parse_output_fields), OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), - + OPT_BOOLEAN('I', "show-info", &show_full_info, + "display extended information from perf.data file"), OPT_END() }; @@ -1268,6 +1270,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) return -1; } + perf_session__fprintf_info(session, stdout, show_full_info); + if (!no_callchain) symbol_conf.use_callchain = true; else diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 4702e2443a8e..b382bd551aac 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -4,7 +4,6 @@ #include "util/util.h" #include "util/strbuf.h" -extern const char perf_version_string[]; extern const char perf_usage_string[]; extern const char perf_more_info_string[]; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index a5fc660c1f12..08b0b5e82a44 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -9,18 +9,21 @@ void get_term_dimensions(struct winsize *ws); #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#define CPUINFO_PROC "model name" #endif #if defined(__x86_64__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#define CPUINFO_PROC "model name" #endif #ifdef __powerpc__ #include "../../arch/powerpc/include/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); +#define CPUINFO_PROC "cpu" #endif #ifdef __s390__ @@ -37,30 +40,35 @@ void get_term_dimensions(struct winsize *ws); # define rmb() asm volatile("" ::: "memory") #endif #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu type" #endif #ifdef __hppa__ #include "../../arch/parisc/include/asm/unistd.h" #define rmb() asm volatile("" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); +#define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ #include "../../arch/sparc/include/asm/unistd.h" #define rmb() asm volatile("":::"memory") #define cpu_relax() asm volatile("":::"memory") +#define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ #include "../../arch/alpha/include/asm/unistd.h" #define rmb() asm volatile("mb" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ #include "../../arch/ia64/include/asm/unistd.h" #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") +#define CPUINFO_PROC "model name" #endif #ifdef __arm__ @@ -71,6 +79,7 @@ void get_term_dimensions(struct winsize *ws); */ #define rmb() ((void(*)(void))0xffff0fa0)() #define cpu_relax() asm volatile("":::"memory") +#define CPUINFO_PROC "Processor" #endif #ifdef __mips__ @@ -83,6 +92,7 @@ void get_term_dimensions(struct winsize *ws); : /* no input */ \ : "memory") #define cpu_relax() asm volatile("" ::: "memory") +#define CPUINFO_PROC "cpu model" #endif #include @@ -171,5 +181,6 @@ struct ip_callchain { }; extern bool perf_host, perf_guest; +extern const char perf_version_string[]; #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b6c1ad123ca9..f2ceb0f7d669 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "evlist.h" #include "evsel.h" @@ -17,12 +18,19 @@ #include "session.h" #include "symbol.h" #include "debug.h" +#include "cpumap.h" static bool no_buildid_cache = false; static int event_count; static struct perf_trace_event_type *events; +static u32 header_argc; +static const char **header_argv; + +static int dsos__write_buildid_table(struct perf_header *header, int fd); +static int perf_session__cache_build_ids(struct perf_session *session); + int perf_header__push_event(u64 id, const char *name) { if (strlen(name) > MAX_EVENT_NAME) @@ -110,6 +118,1020 @@ static int write_padded(int fd, const void *bf, size_t count, return err; } +static int do_write_string(int fd, const char *str) +{ + u32 len, olen; + int ret; + + olen = strlen(str) + 1; + len = ALIGN(olen, NAME_ALIGN); + + /* write len, incl. \0 */ + ret = do_write(fd, &len, sizeof(len)); + if (ret < 0) + return ret; + + return write_padded(fd, str, olen, len); +} + +static char *do_read_string(int fd, struct perf_header *ph) +{ + ssize_t sz, ret; + u32 len; + char *buf; + + sz = read(fd, &len, sizeof(len)); + if (sz < (ssize_t)sizeof(len)) + return NULL; + + if (ph->needs_swap) + len = bswap_32(len); + + buf = malloc(len); + if (!buf) + return NULL; + + ret = read(fd, buf, len); + if (ret == (ssize_t)len) { + /* + * strings are padded by zeroes + * thus the actual strlen of buf + * may be less than len + */ + return buf; + } + + free(buf); + return NULL; +} + +int +perf_header__set_cmdline(int argc, const char **argv) +{ + int i; + + header_argc = (u32)argc; + + /* do not include NULL termination */ + header_argv = calloc(argc, sizeof(char *)); + if (!header_argv) + return -ENOMEM; + + /* + * must copy argv contents because it gets moved + * around during option parsing + */ + for (i = 0; i < argc ; i++) + header_argv[i] = argv[i]; + + return 0; +} + +static int write_trace_info(int fd, struct perf_header *h __used, + struct perf_evlist *evlist) +{ + return read_tracing_data(fd, &evlist->entries); +} + + +static int write_build_id(int fd, struct perf_header *h, + struct perf_evlist *evlist __used) +{ + struct perf_session *session; + int err; + + session = container_of(h, struct perf_session, header); + + err = dsos__write_buildid_table(h, fd); + if (err < 0) { + pr_debug("failed to write buildid table\n"); + return err; + } + if (!no_buildid_cache) + perf_session__cache_build_ids(session); + + return 0; +} + +static int write_hostname(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; + + ret = uname(&uts); + if (ret < 0) + return -1; + + return do_write_string(fd, uts.nodename); +} + +static int write_osrelease(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; + + ret = uname(&uts); + if (ret < 0) + return -1; + + return do_write_string(fd, uts.release); +} + +static int write_arch(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct utsname uts; + int ret; + + ret = uname(&uts); + if (ret < 0) + return -1; + + return do_write_string(fd, uts.machine); +} + +static int write_version(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + return do_write_string(fd, perf_version_string); +} + +static int write_cpudesc(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ +#ifndef CPUINFO_PROC +#define CPUINFO_PROC NULL +#endif + FILE *file; + char *buf = NULL; + char *s, *p; + const char *search = CPUINFO_PROC; + size_t len = 0; + int ret = -1; + + if (!search) + return -1; + + file = fopen("/proc/cpuinfo", "r"); + if (!file) + return -1; + + while (getline(&buf, &len, file) > 0) { + ret = strncmp(buf, search, strlen(search)); + if (!ret) + break; + } + + if (ret) + goto done; + + s = buf; + + p = strchr(buf, ':'); + if (p && *(p+1) == ' ' && *(p+2)) + s = p + 2; + p = strchr(s, '\n'); + if (p) + *p = '\0'; + + /* squash extra space characters (branding string) */ + p = s; + while (*p) { + if (isspace(*p)) { + char *r = p + 1; + char *q = r; + *p = ' '; + while (*q && isspace(*q)) + q++; + if (q != (p+1)) + while ((*r++ = *q++)); + } + p++; + } + ret = do_write_string(fd, s); +done: + free(buf); + fclose(file); + return ret; +} + +static int write_nrcpus(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + long nr; + u32 nrc, nra; + int ret; + + nr = sysconf(_SC_NPROCESSORS_CONF); + if (nr < 0) + return -1; + + nrc = (u32)(nr & UINT_MAX); + + nr = sysconf(_SC_NPROCESSORS_ONLN); + if (nr < 0) + return -1; + + nra = (u32)(nr & UINT_MAX); + + ret = do_write(fd, &nrc, sizeof(nrc)); + if (ret < 0) + return ret; + + return do_write(fd, &nra, sizeof(nra)); +} + +static int write_event_desc(int fd, struct perf_header *h __used, + struct perf_evlist *evlist) +{ + struct perf_evsel *attr; + u32 nre = 0, nri, sz; + int ret; + + list_for_each_entry(attr, &evlist->entries, node) + nre++; + + /* + * write number of events + */ + ret = do_write(fd, &nre, sizeof(nre)); + if (ret < 0) + return ret; + + /* + * size of perf_event_attr struct + */ + sz = (u32)sizeof(attr->attr); + ret = do_write(fd, &sz, sizeof(sz)); + if (ret < 0) + return ret; + + list_for_each_entry(attr, &evlist->entries, node) { + + ret = do_write(fd, &attr->attr, sz); + if (ret < 0) + return ret; + /* + * write number of unique id per event + * there is one id per instance of an event + * + * copy into an nri to be independent of the + * type of ids, + */ + nri = attr->ids; + ret = do_write(fd, &nri, sizeof(nri)); + if (ret < 0) + return ret; + + /* + * write event string as passed on cmdline + */ + ret = do_write_string(fd, attr->name); + if (ret < 0) + return ret; + /* + * write unique ids for this event + */ + ret = do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (ret < 0) + return ret; + } + return 0; +} + +static int write_cmdline(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char buf[MAXPATHLEN]; + char proc[32]; + u32 i, n; + int ret; + + /* + * actual atual path to perf binary + */ + sprintf(proc, "/proc/%d/exe", getpid()); + ret = readlink(proc, buf, sizeof(buf)); + if (ret <= 0) + return -1; + + /* readlink() does not add null termination */ + buf[ret] = '\0'; + + /* account for binary path */ + n = header_argc + 1; + + ret = do_write(fd, &n, sizeof(n)); + if (ret < 0) + return ret; + + ret = do_write_string(fd, buf); + if (ret < 0) + return ret; + + for (i = 0 ; i < header_argc; i++) { + ret = do_write_string(fd, header_argv[i]); + if (ret < 0) + return ret; + } + return 0; +} + +#define CORE_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" +#define THRD_SIB_FMT \ + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" + +struct cpu_topo { + u32 core_sib; + u32 thread_sib; + char **core_siblings; + char **thread_siblings; +}; + +static int build_cpu_topo(struct cpu_topo *tp, int cpu) +{ + FILE *fp; + char filename[MAXPATHLEN]; + char *buf = NULL, *p; + size_t len = 0; + u32 i = 0; + int ret = -1; + + sprintf(filename, CORE_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + return -1; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + fclose(fp); + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->core_sib; i++) { + if (!strcmp(buf, tp->core_siblings[i])) + break; + } + if (i == tp->core_sib) { + tp->core_siblings[i] = buf; + tp->core_sib++; + buf = NULL; + len = 0; + } + + sprintf(filename, THRD_SIB_FMT, cpu); + fp = fopen(filename, "r"); + if (!fp) + goto done; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->thread_sib; i++) { + if (!strcmp(buf, tp->thread_siblings[i])) + break; + } + if (i == tp->thread_sib) { + tp->thread_siblings[i] = buf; + tp->thread_sib++; + buf = NULL; + } + ret = 0; +done: + if(fp) + fclose(fp); + free(buf); + return ret; +} + +static void free_cpu_topo(struct cpu_topo *tp) +{ + u32 i; + + if (!tp) + return; + + for (i = 0 ; i < tp->core_sib; i++) + free(tp->core_siblings[i]); + + for (i = 0 ; i < tp->thread_sib; i++) + free(tp->thread_siblings[i]); + + free(tp); +} + +static struct cpu_topo *build_cpu_topology(void) +{ + struct cpu_topo *tp; + void *addr; + u32 nr, i; + size_t sz; + long ncpus; + int ret = -1; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return NULL; + + nr = (u32)(ncpus & UINT_MAX); + + sz = nr * sizeof(char *); + + addr = calloc(1, sizeof(*tp) + 2 * sz); + if (!addr) + return NULL; + + tp = addr; + + addr += sizeof(*tp); + tp->core_siblings = addr; + addr += sz; + tp->thread_siblings = addr; + + for (i = 0; i < nr; i++) { + ret = build_cpu_topo(tp, i); + if (ret < 0) + break; + } + if (ret) { + free_cpu_topo(tp); + tp = NULL; + } + return tp; +} + +static int write_cpu_topology(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct cpu_topo *tp; + u32 i; + int ret; + + tp = build_cpu_topology(); + if (!tp) + return -1; + + ret = do_write(fd, &tp->core_sib, sizeof(tp->core_sib)); + if (ret < 0) + goto done; + + for (i = 0; i < tp->core_sib; i++) { + ret = do_write_string(fd, tp->core_siblings[i]); + if (ret < 0) + goto done; + } + ret = do_write(fd, &tp->thread_sib, sizeof(tp->thread_sib)); + if (ret < 0) + goto done; + + for (i = 0; i < tp->thread_sib; i++) { + ret = do_write_string(fd, tp->thread_siblings[i]); + if (ret < 0) + break; + } +done: + free_cpu_topo(tp); + return ret; +} + + + +static int write_total_mem(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char *buf = NULL; + FILE *fp; + size_t len = 0; + int ret = -1, n; + uint64_t mem; + + fp = fopen("/proc/meminfo", "r"); + if (!fp) + return -1; + + while (getline(&buf, &len, fp) > 0) { + ret = strncmp(buf, "MemTotal:", 9); + if (!ret) + break; + } + if (!ret) { + n = sscanf(buf, "%*s %"PRIu64, &mem); + if (n == 1) + ret = do_write(fd, &mem, sizeof(mem)); + } + free(buf); + fclose(fp); + return ret; +} + +static int write_topo_node(int fd, int node) +{ + char str[MAXPATHLEN]; + char field[32]; + char *buf = NULL, *p; + size_t len = 0; + FILE *fp; + u64 mem_total, mem_free, mem; + int ret = -1; + + sprintf(str, "/sys/devices/system/node/node%d/meminfo", node); + fp = fopen(str, "r"); + if (!fp) + return -1; + + while (getline(&buf, &len, fp) > 0) { + /* skip over invalid lines */ + if (!strchr(buf, ':')) + continue; + if (sscanf(buf, "%*s %*d %s %"PRIu64, field, &mem) != 2) + goto done; + if (!strcmp(field, "MemTotal:")) + mem_total = mem; + if (!strcmp(field, "MemFree:")) + mem_free = mem; + } + + fclose(fp); + + ret = do_write(fd, &mem_total, sizeof(u64)); + if (ret) + goto done; + + ret = do_write(fd, &mem_free, sizeof(u64)); + if (ret) + goto done; + + ret = -1; + sprintf(str, "/sys/devices/system/node/node%d/cpulist", node); + + fp = fopen(str, "r"); + if (!fp) + goto done; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + ret = do_write_string(fd, buf); +done: + free(buf); + fclose(fp); + return ret; +} + +static int write_numa_topology(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char *buf = NULL; + size_t len = 0; + FILE *fp; + struct cpu_map *node_map = NULL; + char *c; + u32 nr, i, j; + int ret = -1; + + fp = fopen("/sys/devices/system/node/online", "r"); + if (!fp) + return -1; + + if (getline(&buf, &len, fp) <= 0) + goto done; + + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + node_map = cpu_map__new(buf); + if (!node_map) + goto done; + + nr = (u32)node_map->nr; + + ret = do_write(fd, &nr, sizeof(nr)); + if (ret < 0) + goto done; + + for (i = 0; i < nr; i++) { + j = (u32)node_map->map[i]; + ret = do_write(fd, &j, sizeof(j)); + if (ret < 0) + break; + + ret = write_topo_node(fd, i); + if (ret < 0) + break; + } +done: + free(buf); + fclose(fp); + free(node_map); + return ret; +} + +/* + * default get_cpuid(): nothing gets recorded + * actual implementation must be in arch/$(ARCH)/util/header.c + */ +int __attribute__((weak)) get_cpuid(char *buffer __used, size_t sz __used) +{ + return -1; +} + +static int write_cpuid(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + char buffer[64]; + int ret; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (!ret) + goto write_it; + + return -1; +write_it: + return do_write_string(fd, buffer); +} + +static void print_hostname(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# hostname : %s\n", str); + free(str); +} + +static void print_osrelease(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# os release : %s\n", str); + free(str); +} + +static void print_arch(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# arch : %s\n", str); + free(str); +} + +static void print_cpudesc(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# cpudesc : %s\n", str); + free(str); +} + +static void print_nrcpus(struct perf_header *ph, int fd, FILE *fp) +{ + ssize_t ret; + u32 nr; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + nr = -1; /* interpreted as error */ + + if (ph->needs_swap) + nr = bswap_32(nr); + + fprintf(fp, "# nrcpus online : %u\n", nr); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + nr = -1; /* interpreted as error */ + + if (ph->needs_swap) + nr = bswap_32(nr); + + fprintf(fp, "# nrcpus avail : %u\n", nr); +} + +static void print_version(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# perf version : %s\n", str); + free(str); +} + +static void print_cmdline(struct perf_header *ph, int fd, FILE *fp) +{ + ssize_t ret; + char *str; + u32 nr, i; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; + + if (ph->needs_swap) + nr = bswap_32(nr); + + fprintf(fp, "# cmdline : "); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "%s ", str); + free(str); + } + fputc('\n', fp); +} + +static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) +{ + ssize_t ret; + u32 nr, i; + char *str; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; + + if (ph->needs_swap) + nr = bswap_32(nr); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "# sibling cores : %s\n", str); + free(str); + } + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + return; + + if (ph->needs_swap) + nr = bswap_32(nr); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + fprintf(fp, "# sibling threads : %s\n", str); + free(str); + } +} + +static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +{ + struct perf_event_attr attr; + uint64_t id; + void *buf = NULL; + char *str; + u32 nre, sz, nr, i, j, msz; + int ret; + + /* number of events */ + ret = read(fd, &nre, sizeof(nre)); + if (ret != (ssize_t)sizeof(nre)) + goto error; + + if (ph->needs_swap) + nre = bswap_32(nre); + + ret = read(fd, &sz, sizeof(sz)); + if (ret != (ssize_t)sizeof(sz)) + goto error; + + if (ph->needs_swap) + sz = bswap_32(sz); + + /* + * ensure it is at least to our ABI rev + */ + if (sz < (u32)sizeof(attr)) + goto error; + + memset(&attr, 0, sizeof(attr)); + + /* read entire region to sync up to next field */ + buf = malloc(sz); + if (!buf) + goto error; + + msz = sizeof(attr); + if (sz < msz) + msz = sz; + + for (i = 0 ; i < nre; i++) { + + ret = read(fd, buf, sz); + if (ret != (ssize_t)sz) + goto error; + + if (ph->needs_swap) + perf_event__attr_swap(buf); + + memcpy(&attr, buf, msz); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + goto error; + + if (ph->needs_swap) + nr = bswap_32(nr); + + str = do_read_string(fd, ph); + fprintf(fp, "# event : name = %s, ", str); + free(str); + + fprintf(fp, "type = %d, config = 0x%"PRIx64 + ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, + attr.type, + (u64)attr.config, + (u64)attr.config1, + (u64)attr.config2); + + fprintf(fp, ", excl_usr = %d, excl_kern = %d", + attr.exclude_user, + attr.exclude_kernel); + + if (nr) + fprintf(fp, ", id = {"); + + for (j = 0 ; j < nr; j++) { + ret = read(fd, &id, sizeof(id)); + if (ret != (ssize_t)sizeof(id)) + goto error; + + if (ph->needs_swap) + id = bswap_64(id); + + if (j) + fputc(',', fp); + + fprintf(fp, " %"PRIu64, id); + } + if (nr && j == nr) + fprintf(fp, " }"); + fputc('\n', fp); + } + free(buf); + return; +error: + fprintf(fp, "# event desc: not available or unable to read\n"); +} + +static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) +{ + uint64_t mem; + ssize_t ret; + + ret = read(fd, &mem, sizeof(mem)); + if (ret != sizeof(mem)) + goto error; + + if (h->needs_swap) + mem = bswap_64(mem); + + fprintf(fp, "# total memory : %"PRIu64" kB\n", mem); + return; +error: + fprintf(fp, "# total memory : unknown\n"); +} + +static void print_numa_topology(struct perf_header *h __used, int fd, FILE *fp) +{ + ssize_t ret; + u32 nr, c, i; + char *str; + uint64_t mem_total, mem_free; + + /* nr nodes */ + ret = read(fd, &nr, sizeof(nr)); + if (ret != (ssize_t)sizeof(nr)) + goto error; + + if (h->needs_swap) + nr = bswap_32(nr); + + for (i = 0; i < nr; i++) { + + /* node number */ + ret = read(fd, &c, sizeof(c)); + if (ret != (ssize_t)sizeof(c)) + goto error; + + if (h->needs_swap) + c = bswap_32(c); + + ret = read(fd, &mem_total, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + ret = read(fd, &mem_free, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + if (h->needs_swap) { + mem_total = bswap_64(mem_total); + mem_free = bswap_64(mem_free); + } + + fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," + " free = %"PRIu64" kB\n", + c, + mem_total, + mem_free); + + str = do_read_string(fd, h); + fprintf(fp, "# node%u cpu list : %s\n", c, str); + free(str); + } + return; +error: + fprintf(fp, "# numa topology : not available\n"); +} + +static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) +{ + char *str = do_read_string(fd, ph); + fprintf(fp, "# cpuid : %s\n", str); + free(str); +} + +struct feature_ops { + int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); + void (*print)(struct perf_header *h, int fd, FILE *fp); + const char *name; + bool full_only; +}; + +#define FEAT_OPA(n, w, p) \ + [n] = { .name = #n, .write = w, .print = p } +#define FEAT_OPF(n, w, p) \ + [n] = { .name = #n, .write = w, .print = p, .full_only = true } + +static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { + FEAT_OPA(HEADER_TRACE_INFO, write_trace_info, NULL), + FEAT_OPA(HEADER_BUILD_ID, write_build_id, NULL), + FEAT_OPA(HEADER_HOSTNAME, write_hostname, print_hostname), + FEAT_OPA(HEADER_OSRELEASE, write_osrelease, print_osrelease), + FEAT_OPA(HEADER_VERSION, write_version, print_version), + FEAT_OPA(HEADER_ARCH, write_arch, print_arch), + FEAT_OPA(HEADER_NRCPUS, write_nrcpus, print_nrcpus), + FEAT_OPA(HEADER_CPUDESC, write_cpudesc, print_cpudesc), + FEAT_OPA(HEADER_CPUID, write_cpuid, print_cpuid), + FEAT_OPA(HEADER_TOTAL_MEM, write_total_mem, print_total_mem), + FEAT_OPA(HEADER_EVENT_DESC, write_event_desc, print_event_desc), + FEAT_OPA(HEADER_CMDLINE, write_cmdline, print_cmdline), + FEAT_OPF(HEADER_CPU_TOPOLOGY, write_cpu_topology, print_cpu_topology), + FEAT_OPF(HEADER_NUMA_TOPOLOGY, write_numa_topology, print_numa_topology), +}; + +struct header_print_data { + FILE *fp; + bool full; /* extended list of headers */ +}; + +static int perf_file_section__fprintf_info(struct perf_file_section *section, + struct perf_header *ph, + int feat, int fd, void *data) +{ + struct header_print_data *hd = data; + + if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { + pr_debug("Failed to lseek to %" PRIu64 " offset for feature " + "%d, continuing...\n", section->offset, feat); + return 0; + } + if (feat < HEADER_TRACE_INFO || feat >= HEADER_LAST_FEATURE) { + pr_warning("unknown feature %d\n", feat); + return -1; + } + if (!feat_ops[feat].print) + return 0; + + if (!feat_ops[feat].full_only || hd->full) + feat_ops[feat].print(ph, fd, hd->fp); + else + fprintf(hd->fp, "# %s info available, use -I to display\n", + feat_ops[feat].name); + + return 0; +} + +int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) +{ + struct header_print_data hd; + struct perf_header *header = &session->header; + int fd = session->fd; + hd.fp = fp; + hd.full = full; + + perf_header__process_sections(header, fd, &hd, + perf_file_section__fprintf_info); + return 0; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -356,15 +1378,41 @@ static bool perf_session__read_build_ids(struct perf_session *session, bool with return ret; } +static int do_write_feat(int fd, struct perf_header *h, int type, + struct perf_file_section **p, + struct perf_evlist *evlist) +{ + int err; + int ret = 0; + + if (perf_header__has_feat(h, type)) { + + (*p)->offset = lseek(fd, 0, SEEK_CUR); + + err = feat_ops[type].write(fd, h, evlist); + if (err < 0) { + pr_debug("failed to write feature %d\n", type); + + /* undo anything written */ + lseek(fd, (*p)->offset, SEEK_SET); + + return -1; + } + (*p)->size = lseek(fd, 0, SEEK_CUR) - (*p)->offset; + (*p)++; + } + return ret; +} + static int perf_header__adds_write(struct perf_header *header, struct perf_evlist *evlist, int fd) { int nr_sections; struct perf_session *session; - struct perf_file_section *feat_sec; + struct perf_file_section *feat_sec, *p; int sec_size; u64 sec_start; - int idx = 0, err; + int err; session = container_of(header, struct perf_session, header); @@ -376,7 +1424,7 @@ static int perf_header__adds_write(struct perf_header *header, if (!nr_sections) return 0; - feat_sec = calloc(sizeof(*feat_sec), nr_sections); + feat_sec = p = calloc(sizeof(*feat_sec), nr_sections); if (feat_sec == NULL) return -ENOMEM; @@ -385,36 +1433,69 @@ static int perf_header__adds_write(struct perf_header *header, sec_start = header->data_offset + header->data_size; lseek(fd, sec_start + sec_size, SEEK_SET); - if (perf_header__has_feat(header, HEADER_TRACE_INFO)) { - struct perf_file_section *trace_sec; + err = do_write_feat(fd, header, HEADER_TRACE_INFO, &p, evlist); + if (err) + goto out_free; - trace_sec = &feat_sec[idx++]; - - /* Write trace info */ - trace_sec->offset = lseek(fd, 0, SEEK_CUR); - read_tracing_data(fd, &evlist->entries); - trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; + err = do_write_feat(fd, header, HEADER_BUILD_ID, &p, evlist); + if (err) { + perf_header__clear_feat(header, HEADER_BUILD_ID); + goto out_free; } - if (perf_header__has_feat(header, HEADER_BUILD_ID)) { - struct perf_file_section *buildid_sec; + err = do_write_feat(fd, header, HEADER_HOSTNAME, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_HOSTNAME); - buildid_sec = &feat_sec[idx++]; + err = do_write_feat(fd, header, HEADER_OSRELEASE, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_OSRELEASE); - /* Write build-ids */ - buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - err = dsos__write_buildid_table(header, fd); - if (err < 0) { - pr_debug("failed to write buildid table\n"); - goto out_free; - } - buildid_sec->size = lseek(fd, 0, SEEK_CUR) - - buildid_sec->offset; - if (!no_buildid_cache) - perf_session__cache_build_ids(session); - } + err = do_write_feat(fd, header, HEADER_VERSION, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_VERSION); + + err = do_write_feat(fd, header, HEADER_ARCH, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_ARCH); + + err = do_write_feat(fd, header, HEADER_NRCPUS, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_NRCPUS); + + err = do_write_feat(fd, header, HEADER_CPUDESC, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_CPUDESC); + + err = do_write_feat(fd, header, HEADER_CPUID, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_CPUID); + + err = do_write_feat(fd, header, HEADER_TOTAL_MEM, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_TOTAL_MEM); + + err = do_write_feat(fd, header, HEADER_CMDLINE, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_CMDLINE); + + err = do_write_feat(fd, header, HEADER_EVENT_DESC, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_EVENT_DESC); + + err = do_write_feat(fd, header, HEADER_CPU_TOPOLOGY, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_CPU_TOPOLOGY); + + err = do_write_feat(fd, header, HEADER_NUMA_TOPOLOGY, &p, evlist); + if (err) + perf_header__clear_feat(header, HEADER_NUMA_TOPOLOGY); lseek(fd, sec_start, SEEK_SET); + /* + * may write more than needed due to dropped feature, but + * this is okay, reader will skip the mising entries + */ err = do_write(fd, feat_sec, sec_size); if (err < 0) pr_debug("failed to write feature section\n"); @@ -554,9 +1635,10 @@ static int perf_header__getbuffer64(struct perf_header *header, } int perf_header__process_sections(struct perf_header *header, int fd, + void *data, int (*process)(struct perf_file_section *section, - struct perf_header *ph, - int feat, int fd)) + struct perf_header *ph, + int feat, int fd, void *data)) { struct perf_file_section *feat_sec; int nr_sections; @@ -584,7 +1666,7 @@ int perf_header__process_sections(struct perf_header *header, int fd, if (perf_header__has_feat(header, feat)) { struct perf_file_section *sec = &feat_sec[idx++]; - err = process(sec, header, feat, fd); + err = process(sec, header, feat, fd, data); if (err < 0) break; } @@ -796,7 +1878,7 @@ out: static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, - int feat, int fd) + int feat, int fd, void *data __used) { if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " @@ -935,7 +2017,8 @@ int perf_session__read_header(struct perf_session *session, int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - perf_header__process_sections(header, fd, perf_file_section__process); + perf_header__process_sections(header, fd, NULL, + perf_file_section__process); lseek(fd, header->data_offset, SEEK_SET); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 1886256768a1..3d5a742f4a2a 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -12,6 +12,20 @@ enum { HEADER_TRACE_INFO = 1, HEADER_BUILD_ID, + + HEADER_HOSTNAME, + HEADER_OSRELEASE, + HEADER_VERSION, + HEADER_ARCH, + HEADER_NRCPUS, + HEADER_CPUDESC, + HEADER_CPUID, + HEADER_TOTAL_MEM, + HEADER_CMDLINE, + HEADER_EVENT_DESC, + HEADER_CPU_TOPOLOGY, + HEADER_NUMA_TOPOLOGY, + HEADER_LAST_FEATURE, }; @@ -68,10 +82,15 @@ void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); +int perf_header__set_cmdline(int argc, const char **argv); + int perf_header__process_sections(struct perf_header *header, int fd, + void *data, int (*process)(struct perf_file_section *section, - struct perf_header *ph, - int feat, int fd)); + struct perf_header *ph, + int feat, int fd, void *data)); + +int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms); @@ -104,4 +123,10 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc, struct perf_session *session); int perf_event__process_build_id(union perf_event *event, struct perf_session *session); + +/* + * arch specific callback + */ +int get_cpuid(char *buffer, size_t sz); + #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 72458d9da5b1..20e011c99a94 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1326,3 +1326,22 @@ int perf_session__cpu_bitmap(struct perf_session *session, return 0; } + +void perf_session__fprintf_info(struct perf_session *session, FILE *fp, + bool full) +{ + struct stat st; + int ret; + + if (session == NULL || fp == NULL) + return; + + ret = fstat(session->fd, &st); + if (ret == -1) + return; + + fprintf(fp, "# ========\n"); + fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); + perf_header__fprintf_info(session, fp, full); + fprintf(fp, "# ========\n#\n"); +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 974d0cbee5e9..514b06d41f05 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -177,4 +177,5 @@ void perf_session__print_ip(union perf_event *event, int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); +void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); #endif /* __PERF_SESSION_H */ From 64c6f0c7f8db449e05ee16e35a7083df69addd1d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Oct 2011 12:48:31 -0300 Subject: [PATCH 16/16] perf tools: Make --no-asm-raw the default And add the annotation output knobs to all the tools that have integrated annotation (top, report). Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-gnlob67mke6sji2kf4nstp7m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 3 +-- tools/perf/Documentation/perf-report.txt | 9 ++++++++- tools/perf/Documentation/perf-top.txt | 10 ++++++++++ tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-report.c | 4 ++++ tools/perf/builtin-top.c | 6 ++++++ tools/perf/util/symbol.c | 1 - 7 files changed, 31 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 0102d83600db..fe6762ed56bd 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -73,8 +73,7 @@ OPTIONS CPUs. --asm-raw:: - Show raw instruction encoding of assembly instructions. They - are displayed by default, disable with --no-asm-raw. + Show raw instruction encoding of assembly instructions. --source:: Interleave source code with assembly code. Enabled by default, diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4ed17072ea88..212f24d672e1 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -137,6 +137,13 @@ OPTIONS -M:: --disassembler-style=:: Set disassembler style for objdump. +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + --show-total-period:: Show a column with the sum of periods. -I:: @@ -147,4 +154,4 @@ OPTIONS SEE ALSO -------- -linkperf:perf-stat[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index e2e5cd0023cd..b1a5bbbfebef 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -126,6 +126,16 @@ Default is to monitor all CPUS. --symbols:: Only consider these symbols. +-M:: +--disassembler-style=:: Set disassembler style for objdump. + +--source:: + Interleave source code with assembly code. Enabled by default, + disable with --no-source. + +--asm-raw:: + Show raw instruction encoding of assembly instructions. + -G [type,min,order]:: --call-graph:: Display call chains using type, min percent threshold and order. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 24db9d2db7b4..3ea764a78053 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -272,9 +272,9 @@ static const struct option options[] = { OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_BOOLEAN('0', "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN('0', "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 66fe822b181d..4d7c8340c326 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -491,6 +491,10 @@ static const struct option options[] = { OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &show_full_info, "Display extended information about perf.data file"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cc877bc83ebd..c5aebf6eb746 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1117,6 +1117,12 @@ static const struct option options[] = { "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + "Interleave source code with assembly code (default)"), + OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + "Display raw encoding of assembly instructions (default)"), + OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_END() }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 077df15ee705..3f09a23f71b4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -46,7 +46,6 @@ struct symbol_conf symbol_conf = { .exclude_other = true, .use_modules = true, .try_vmlinux_path = true, - .annotate_asm_raw = true, .annotate_src = true, .symfs = "", };