From f66fedcb723a9f118170200e21dbabb305f8c702 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 20 Aug 2011 14:39:23 +0900 Subject: [PATCH 01/12] perf probe: Fix regression of variable finder Fix to call convert_variable() if previous call does not fail. To call convert_variable, it ensures "ret" is 0. However, since "ret" has the return value of synthesize_perf_probe_arg() which always returns positive value if it succeeded, perf probe doesn't call convert_variable(). This will cause a SEGV when we add an event with arguments. This has to be fixed as it ensures "ret" is greater than 0 (or not negative). This regression has been introduced by my previous patch, f182e3e1. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110820053922.3286.65805.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 555fc3864b90..5d732621a462 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -659,7 +659,7 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) ret = -ENOENT; } - if (ret == 0) + if (ret >= 0) ret = convert_variable(&vr_die, pf); if (ret < 0) From adb091846318f86e4f46c7d6a7b40d2f478abdbe Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 16:40:14 +1000 Subject: [PATCH 02/12] perf symbols: Fix ppc64 SEGV in dso__load_sym with debuginfo files 64bit PowerPC debuginfo files have an empty function descriptor section. I hit a SEGV when perf tried to use this section for symbol resolution. To fix this we need to check the section is valid and we can do this by checking for type SHT_PROGBITS. Cc: Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Eric B Munson Link: http://lkml.kernel.org/r/20110824065242.895239970@samba.org Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 469c0264ed29..bb5d32f38af2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1111,6 +1111,8 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, } opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); + if (opdshdr.sh_type != SHT_PROGBITS) + opdsec = NULL; if (opdsec) opddata = elf_rawdata(opdsec, NULL); From 3f5a42722b9e78a434d5a4ee5e607dc33c69ac80 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 16:40:15 +1000 Subject: [PATCH 03/12] perf symbols: /proc/kallsyms does not sort module symbols kallsyms__parse assumes that /proc/kallsyms is sorted and sets the end of the previous symbol to the start of the current one. Unfortunately module symbols are not sorted, eg: ffffffffa0081f30 t e1000_clean_rx_irq [e1000e] ffffffffa00817a0 t e1000_alloc_rx_buffers [e1000e] Some symbols end up with a negative length and others have a length larger than they should. This results in confusing perf output. We already have a function to fixup the end of zero length symbols so use that instead. Cc: Eric B Munson Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110824065242.969681349@samba.org Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index bb5d32f38af2..f119e85dc6c2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -438,18 +438,11 @@ int kallsyms__parse(const char *filename, void *arg, char *line = NULL; size_t n; int err = -1; - u64 prev_start = 0; - char prev_symbol_type = 0; - char *prev_symbol_name; FILE *file = fopen(filename, "r"); if (file == NULL) goto out_failure; - prev_symbol_name = malloc(KSYM_NAME_LEN); - if (prev_symbol_name == NULL) - goto out_close; - err = 0; while (!feof(file)) { @@ -480,24 +473,18 @@ int kallsyms__parse(const char *filename, void *arg, break; } - if (prev_symbol_type) { - u64 end = start; - if (end != prev_start) - --end; - err = process_symbol(arg, prev_symbol_name, - prev_symbol_type, prev_start, end); - if (err) - break; - } - - memcpy(prev_symbol_name, symbol_name, len + 1); - prev_symbol_type = symbol_type; - prev_start = start; + /* + * module symbols are not sorted so we add all + * symbols with zero length and rely on + * symbols__fixup_end() to fix it up. + */ + err = process_symbol(arg, symbol_name, + symbol_type, start, start); + if (err) + break; } - free(prev_symbol_name); free(line); -out_close: fclose(file); return err; @@ -703,6 +690,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; + symbols__fixup_end(&dso->symbols[map->type]); + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) dso->symtab_type = SYMTAB__GUEST_KALLSYMS; else From 318779086086f46127a249878eeebb3dc80578eb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 16:40:16 +1000 Subject: [PATCH 04/12] perf symbols: Preserve symbol scope when parsing /proc/kallsyms kallsyms__parse capitalises the symbol type, so every symbol is marked global. Remove this and fix symbol_type__is_a to handle both local and global symbols. Cc: Eric B Munson Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110824065243.077125989@samba.org Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f119e85dc6c2..0d94ddb55bd7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -74,11 +74,13 @@ static void dso__set_sorted_by_name(struct dso *dso, enum map_type type) bool symbol_type__is_a(char symbol_type, enum map_type map_type) { + symbol_type = toupper(symbol_type); + switch (map_type) { case MAP__FUNCTION: return symbol_type == 'T' || symbol_type == 'W'; case MAP__VARIABLE: - return symbol_type == 'D' || symbol_type == 'd'; + return symbol_type == 'D'; default: return false; } @@ -463,7 +465,7 @@ int kallsyms__parse(const char *filename, void *arg, if (len + 2 >= line_len) continue; - symbol_type = toupper(line[len]); + symbol_type = line[len]; len += 2; symbol_name = line + len; len = line_len - len; From 694bf407b06113f5e0f71764756f11903126fec0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 16:40:17 +1000 Subject: [PATCH 05/12] perf symbols: Add some heuristics for choosing the best duplicate symbol Try and pick the best symbol based on a few heuristics: - Prefer a non weak symbol over a weak one - Prefer a global symbol over a non global one - Prefer a symbol with less underscores (idea taken from kallsyms.c) - If all else fails, choose the symbol with the longest name Cc: Eric B Munson Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110824065243.161953371@samba.org Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 88 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0d94ddb55bd7..870b3b8c649b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -86,6 +86,92 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type) } } +static int prefix_underscores_count(const char *str) +{ + const char *tail = str; + + while (*tail == '_') + tail++; + + return tail - str; +} + +#define SYMBOL_A 0 +#define SYMBOL_B 1 + +static int choose_best_symbol(struct symbol *syma, struct symbol *symb) +{ + s64 a; + s64 b; + + /* Prefer a symbol with non zero length */ + a = syma->end - syma->start; + b = symb->end - symb->start; + if ((b == 0) && (a > 0)) + return SYMBOL_A; + else if ((a == 0) && (b > 0)) + return SYMBOL_B; + + /* Prefer a non weak symbol over a weak one */ + a = syma->binding == STB_WEAK; + b = symb->binding == STB_WEAK; + if (b && !a) + return SYMBOL_A; + if (a && !b) + return SYMBOL_B; + + /* Prefer a global symbol over a non global one */ + a = syma->binding == STB_GLOBAL; + b = symb->binding == STB_GLOBAL; + if (a && !b) + return SYMBOL_A; + if (b && !a) + return SYMBOL_B; + + /* Prefer a symbol with less underscores */ + a = prefix_underscores_count(syma->name); + b = prefix_underscores_count(symb->name); + if (b > a) + return SYMBOL_A; + else if (a > b) + return SYMBOL_B; + + /* If all else fails, choose the symbol with the longest name */ + if (strlen(syma->name) >= strlen(symb->name)) + return SYMBOL_A; + else + return SYMBOL_B; +} + +static void symbols__fixup_duplicate(struct rb_root *symbols) +{ + struct rb_node *nd; + struct symbol *curr, *next; + + nd = rb_first(symbols); + + while (nd) { + curr = rb_entry(nd, struct symbol, rb_node); +again: + nd = rb_next(&curr->rb_node); + next = rb_entry(nd, struct symbol, rb_node); + + if (!nd) + break; + + if (curr->start != next->start) + continue; + + if (choose_best_symbol(curr, next) == SYMBOL_A) { + rb_erase(&next->rb_node, symbols); + goto again; + } else { + nd = rb_next(&curr->rb_node); + rb_erase(&curr->rb_node, symbols); + } + } +} + static void symbols__fixup_end(struct rb_root *symbols) { struct rb_node *nd, *prevnd = rb_first(symbols); @@ -692,6 +778,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) @@ -1269,6 +1356,7 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* From 764e16a30a77a9c8346fbae6615e7c818ce9d00f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 25 Aug 2011 10:17:55 -0600 Subject: [PATCH 06/12] perf record: Create events initially disabled and enable after init perf-record currently creates events enabled. When doing a system wide collection (-a arg) this causes data collection for perf's initialization activities -- eg., perf_event__synthesize_threads(). For some events (e.g., context switch S/W event or tracepoints like syscalls) perf's initialization causes a lot of events to be captured frequently generating "Check IO/CPU overload!" warnings on larger systems (e.g., 2 socket, quad core, hyperthreading). perf's initialization phase can be skipped by creating events disabled and then enabling them once the initialization is done. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1314289075-14706-1-git-send-email-dsahern@gmail.com Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 3 +++ tools/perf/util/evlist.c | 13 +++++++++++++ tools/perf/util/evlist.h | 1 + 3 files changed, 17 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6b0519f885e4..f4c3fbee4bad 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -161,6 +161,7 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ + attr->disabled = 1; attr->inherit = !no_inherit; attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | @@ -671,6 +672,8 @@ static int __cmd_record(int argc, const char **argv) } } + perf_evlist__enable(evsel_list); + /* * Let the child rip */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c12bd476c6f7..72e9f4886b6d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -113,6 +113,19 @@ void perf_evlist__disable(struct perf_evlist *evlist) } } +void perf_evlist__enable(struct perf_evlist *evlist) +{ + int cpu, thread; + struct perf_evsel *pos; + + for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + list_for_each_entry(pos, &evlist->entries, node) { + for (thread = 0; thread < evlist->threads->nr; thread++) + ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + } + } +} + int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index ce85ae9ae57a..f34915002745 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); void perf_evlist__disable(struct perf_evlist *evlist); +void perf_evlist__enable(struct perf_evlist *evlist); static inline void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, From 6a0e55d85babfccfd976703852ec8bab388b3a10 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 30 Aug 2011 09:15:06 +1000 Subject: [PATCH 07/12] perf symbols: Synthesize anonymous mmap events perf_event__synthesize_mmap_events does not create anonymous mmap events even though the kernel does. As a result an already running application with dynamically created code will not get profiled - all samples end up in the unknown bucket. This patch skips any entries with '[' in the name to avoid adding events for special regions (eg the vsyscall page). All other executable mmaps are assumed to be anonymous and an event is synthesized. Acked-by: Pekka Enberg Cc: Eric B Munson Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Pekka Enberg Link: http://lkml.kernel.org/r/20110830091506.60b51fe8@kryten Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3c1b8a632101..437f8ca679a0 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -169,12 +169,17 @@ static int perf_event__synthesize_mmap_events(union perf_event *event, continue; pbf += n + 3; if (*pbf == 'x') { /* vm_exec */ + char anonstr[] = "//anon\n"; char *execname = strchr(bf, '/'); /* Catch VDSO */ if (execname == NULL) execname = strstr(bf, "[vdso]"); + /* Catch anonymous mmaps */ + if ((execname == NULL) && !strstr(bf, "[")) + execname = anonstr; + if (execname == NULL) continue; From 6bb8f311a870d6042e0310309eb3d607ae52fe3e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 31 Aug 2011 11:51:45 +1000 Subject: [PATCH 08/12] perf sort: Fix symbol sort output by separating unresolved samples by type I took a profile that suggested 60% of total CPU time was in the hypervisor: ... 60.20% [H] 0x33d43c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock Using perf stat to get the user/kernel/hypervisor breakdown contradicted this. The problem is we merge all unresolved samples into the one unknown bucket. If add a comparison by sample type to sort__sym_cmp we get the real picture: ... 57.11% [.] 0x80fbf63c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock 0.65% [H] 0x33d43c So it was almost all userspace, not hypervisor as the initial profile suggested. I found another issue while adding this. Symbol sorting sometimes shows multiple entries for the unknown bucket: ... 16.65% [.] 0x6cd3a8 7.25% [.] 0x422460 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.82% [.] 0x532908 2.64% [.] 0x36b538 0.94% [H] 0x8000000000e132a4 0.82% [H] 0x800000000000e8b0 This happens because we aren't consistent with our sorting. On one hand we check to see if both symbols match and for two unresolved samples sym is NULL so we match: if (left->ms.sym == right->ms.sym) return 0; On the other hand we use sample IP for unresolved samples when comparing against a symbol: ip_l = left->ms.sym ? left->ms.sym->start : left->ip; ip_r = right->ms.sym ? right->ms.sym->start : right->ip; This means unresolved samples end up spread across the rbtree and we can't merge them all. If we use cmp_null all unresolved samples will end up in the one bucket and the output makes more sense: ... 39.12% [.] 0x36b538 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.26% [H] 0x800000000000e8b0 Acked-by: Eric B Munson Cc: Eric B Munson Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ian Munsie Link: http://lkml.kernel.org/r/20110831115145.4f598ab2@kryten Signed-off-by: Anton Blanchard Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 401e220566fd..1ee8f1e40f18 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -151,11 +151,17 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { u64 ip_l, ip_r; + if (!left->ms.sym && !right->ms.sym) + return right->level - left->level; + + if (!left->ms.sym || !right->ms.sym) + return cmp_null(left->ms.sym, right->ms.sym); + if (left->ms.sym == right->ms.sym) return 0; - ip_l = left->ms.sym ? left->ms.sym->start : left->ip; - ip_r = right->ms.sym ? right->ms.sym->start : right->ip; + ip_l = left->ms.sym->start; + ip_r = right->ms.sym->start; return (int64_t)(ip_r - ip_l); } From 936be50306a92356367f330ef9d44f1f62478d22 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 6 Sep 2011 09:12:26 -0600 Subject: [PATCH 09/12] perf tool: Fix endianness handling of u32 data in samples Currently, analyzing PPC data files on x86 the cpu field is always 0 and the tid and pid are backwards. For example, analyzing a PPC file on PPC the pid/tid fields show: rsyslogd 1210/1212 and analyzing the same PPC file using an x86 perf binary shows: rsyslogd 1212/1210 The problem is that the swap_op method for samples is perf_event__all64_swap which assumes all elements in the sample_data struct are u64s. cpu, tid and pid are u32s and need to be handled individually. Given that the swap is done before the sample is parsed, the simplest solution is to undo the 64-bit swap of those elements when the sample is parsed and do the proper swap. The RAW data field is generic and perf cannot have programmatic knowledge of how to treat that data. Instead a warning is given to the user. Thanks to Anton Blanchard for providing a data file for a mult-CPU PPC system so I could verify the fix for the CPU fields. v3 -> v4: - fixed use of WARN_ONCE v2 -> v3: - used WARN_ONCE for message regarding raw data - removed struct wrapper around union - fixed whitespace issues v1 -> v2: - added a union for undoing the byte-swap on u64 and redoing swap on u32's to address compiler errors (see git commit 65014ab3) Cc: Anton Blanchard Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1315321946-16993-1-git-send-email-dsahern@gmail.com Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 2 +- tools/perf/util/event.h | 2 +- tools/perf/util/evsel.c | 54 +++++++++++++++++++++++++++++++-------- tools/perf/util/session.h | 3 ++- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 55f4c76f2821..efe696f936e2 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -561,7 +561,7 @@ static int test__basic_mmap(void) } err = perf_event__parse_sample(event, attr.sample_type, sample_size, - false, &sample); + false, &sample, false); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1d7f66488a88..357a85b85248 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -186,6 +186,6 @@ const char *perf_event__name(unsigned int id); int perf_event__parse_sample(const union perf_event *event, u64 type, int sample_size, bool sample_id_all, - struct perf_sample *sample); + struct perf_sample *sample, bool swapped); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a03a36b7908a..c5748c52318f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -7,6 +7,8 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include +#include "asm/bug.h" #include "evsel.h" #include "evlist.h" #include "util.h" @@ -342,10 +344,20 @@ static bool sample_overlap(const union perf_event *event, int perf_event__parse_sample(const union perf_event *event, u64 type, int sample_size, bool sample_id_all, - struct perf_sample *data) + struct perf_sample *data, bool swapped) { const u64 *array; + /* + * used for cross-endian analysis. See git commit 65014ab3 + * for why this goofiness is needed. + */ + union { + u64 val64; + u32 val32[2]; + } u; + + data->cpu = data->pid = data->tid = -1; data->stream_id = data->id = data->time = -1ULL; @@ -366,9 +378,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - data->pid = p[0]; - data->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + data->pid = u.val32[0]; + data->tid = u.val32[1]; array++; } @@ -395,8 +414,15 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - data->cpu = *p; + + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + data->cpu = u.val32[0]; array++; } @@ -423,18 +449,24 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_RAW) { - u32 *p = (u32 *)array; + u.val64 = *array; + if (WARN_ONCE(swapped, + "Endianness of raw data not corrected!\n")) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } if (sample_overlap(event, array, sizeof(u32))) return -EFAULT; - data->raw_size = *p; - p++; + data->raw_size = u.val32[0]; - if (sample_overlap(event, p, data->raw_size)) + if (sample_overlap(event, &u.val32[1], data->raw_size)) return -EFAULT; - data->raw_data = p; + data->raw_data = &u.val32[1]; } return 0; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 170601e67d6b..974d0cbee5e9 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -162,7 +162,8 @@ static inline int perf_session__parse_sample(struct perf_session *session, { return perf_event__parse_sample(event, session->sample_type, session->sample_size, - session->sample_id_all, sample); + session->sample_id_all, sample, + session->header.needs_swap); } struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, From be96ea8ffa788dccb1ba895cced29db6687c4911 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 22 Oct 2010 17:25:01 +0200 Subject: [PATCH 10/12] perf symbols: Fix issue with binaries using 16-bytes buildids (v2) Buildid can vary in size. According to the man page of ld, buildid can be 160 bits (sha1) or 128 bits (md5, uuid). Perf assumes buildid size of 20 bytes (160 bits) regardless. When dealing with md5 buildids, it would thus read more than needed and that would cause mismatches and samples without symbols. This patch fixes this by taking into account the actual buildid size as encoded int he section header. The leftover bytes are also cleared. This second version fixes a minor issue with the memset() base position. Cc: David S. Miller Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: http://lkml.kernel.org/r/4cc1af3c.8ee7d80a.5a28.ffff868e@mx.google.com Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 870b3b8c649b..40eeaf07725b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1170,8 +1170,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; - if (elf_read_build_id(elf, build_id, - BUILD_ID_SIZE) != BUILD_ID_SIZE) + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) goto out_elf_end; if (!dso__build_id_equal(dso, build_id)) @@ -1443,8 +1442,8 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) ptr = data->d_buf; while (ptr < (data->d_buf + data->d_size)) { GElf_Nhdr *nhdr = ptr; - int namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); + size_t namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); const char *name; ptr += sizeof(*nhdr); @@ -1453,8 +1452,10 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) if (nhdr->n_type == NT_GNU_BUILD_ID && nhdr->n_namesz == sizeof("GNU")) { if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - memcpy(bf, ptr, BUILD_ID_SIZE); - err = BUILD_ID_SIZE; + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + err = descsz; break; } } @@ -1506,7 +1507,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) while (1) { char bf[BUFSIZ]; GElf_Nhdr nhdr; - int namesz, descsz; + size_t namesz, descsz; if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) break; @@ -1515,15 +1516,16 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) descsz = NOTE_ALIGN(nhdr.n_descsz); if (nhdr.n_type == NT_GNU_BUILD_ID && nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != namesz) + if (read(fd, bf, namesz) != (ssize_t)namesz) break; if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - if (read(fd, build_id, - BUILD_ID_SIZE) == BUILD_ID_SIZE) { + size_t sz = min(descsz, size); + if (read(fd, build_id, sz) == (ssize_t)sz) { + memset(build_id + sz, 0, size - sz); err = 0; break; } - } else if (read(fd, bf, descsz) != descsz) + } else if (read(fd, bf, descsz) != (ssize_t)descsz) break; } else { int n = namesz + descsz; From af52aafad26fe83edc3ff95d6f630c2fc98a0c4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Sep 2011 15:54:30 -0300 Subject: [PATCH 11/12] perf top: Fix userspace sample addr map offset The 'perf top' tool came from the kernel where we had each DSO (vmlinux, modules) loaded just once at a time. But userspace may have DSOs loaded in multiple addresses (shared libraries), requiring that we use the just resolved map instead of the first one found. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ag53wz0yllpgers0n2w7hchp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a43433f08300..d28013b7d61c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -191,7 +191,8 @@ static void __zero_source_counters(struct sym_entry *syme) symbol__annotate_zero_histograms(sym); } -static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) +static void record_precise_ip(struct sym_entry *syme, struct map *map, + int counter, u64 ip) { struct annotation *notes; struct symbol *sym; @@ -205,8 +206,8 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) if (pthread_mutex_trylock(¬es->lock)) return; - ip = syme->map->map_ip(syme->map, ip); - symbol__inc_addr_samples(sym, syme->map, counter, ip); + ip = map->map_ip(map, ip); + symbol__inc_addr_samples(sym, map, counter, ip); pthread_mutex_unlock(¬es->lock); } @@ -810,7 +811,7 @@ static void perf_event__process_sample(const union perf_event *event, evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); syme->count[evsel->idx]++; - record_precise_ip(syme, evsel->idx, ip); + record_precise_ip(syme, al.map, evsel->idx, ip); pthread_mutex_lock(&top.active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) { static bool first = true; From 9e59e0995a57048c53773f1de99c6637ad12dd25 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Thu, 8 Sep 2011 13:42:39 -0700 Subject: [PATCH 12/12] perf tools: Add support for disabling -Werror via WERROR=0 GCC often introduces new warnings with lots of false positives - breaking -Werror builds. WERROR=0 allows one to build perf without much fuss - while still encouraging people to send patches to avoid the fuss of having to type WERROR=0. Bisecting back to commits that produce a (mostly harmless) warning on some compilers is more difficult. With WERROR=0 one could bisect without worrying about harmless warnings. Cc: Ingo Molnar Link: http://lkml.kernel.org/r/eac06c7cc4920e5d4830417d466161fb26c7359c.1315514559.git.dvhart@linux.intel.com Signed-off-by: Darren Hart Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3b8f7b80376b..e9d5c271db69 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -30,6 +30,8 @@ endif # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. # # Define NO_DWARF if you do not want debug-info analysis feature at all. +# +# Define WERROR=0 to disable treating any warnings as errors. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -63,6 +65,11 @@ ifeq ($(ARCH),x86_64) endif endif +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS_WERROR := -Werror +endif + # # Include saner warnings here, which can catch bugs: # @@ -95,7 +102,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS)