diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e96923310d57..46e0c32ad23b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -397,7 +397,9 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o LIB_OBJS += $(OUTPUT)tests/pmu.o +LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_link.o +LIB_OBJS += $(OUTPUT)tests/hists_filter.o LIB_OBJS += $(OUTPUT)tests/python-use.o LIB_OBJS += $(OUTPUT)tests/bp_signal.o LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o @@ -414,6 +416,8 @@ ifeq ($(ARCH),x86) LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o endif endif +LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o +LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index b8c0102c70c8..c916656259f7 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -23,7 +23,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_X86_SP]; - map = map_groups__find(&thread->mg, MAP__FUNCTION, (u64) sp); + map = map_groups__find(thread->mg, MAP__FUNCTION, (u64) sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 5c11ecad02a9..ebdad3376c67 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -15,6 +15,9 @@ #ifndef __NR_futex # define __NR_futex 240 #endif +#ifndef __NR_gettid +# define __NR_gettid 224 +#endif #endif #if defined(__x86_64__) @@ -29,6 +32,9 @@ #ifndef __NR_futex # define __NR_futex 202 #endif +#ifndef __NR_gettid +# define __NR_gettid 186 +#endif #endif #ifdef __powerpc__ diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b11bf8a08430..0d5afaf72944 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -123,6 +123,18 @@ static struct test { }, #endif #endif + { + .desc = "Test filtering hist entries", + .func = test__hists_filter, + }, + { + .desc = "Test mmap thread lookup", + .func = test__mmap_thread_lookup, + }, + { + .desc = "Test thread mg sharing", + .func = test__thread_mg_share, + }, { .func = NULL, }, diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c new file mode 100644 index 000000000000..44655b395bb9 --- /dev/null +++ b/tools/perf/tests/hists_common.c @@ -0,0 +1,148 @@ +#include "perf.h" +#include "util/debug.h" +#include "util/symbol.h" +#include "util/sort.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/thread.h" +#include "tests/hists_common.h" + +static struct { + u32 pid; + const char *comm; +} fake_threads[] = { + { 100, "perf" }, + { 200, "perf" }, + { 300, "bash" }, +}; + +static struct { + u32 pid; + u64 start; + const char *filename; +} fake_mmap_info[] = { + { 100, 0x40000, "perf" }, + { 100, 0x50000, "libc" }, + { 100, 0xf0000, "[kernel]" }, + { 200, 0x40000, "perf" }, + { 200, 0x50000, "libc" }, + { 200, 0xf0000, "[kernel]" }, + { 300, 0x40000, "bash" }, + { 300, 0x50000, "libc" }, + { 300, 0xf0000, "[kernel]" }, +}; + +struct fake_sym { + u64 start; + u64 length; + const char *name; +}; + +static struct fake_sym perf_syms[] = { + { 700, 100, "main" }, + { 800, 100, "run_command" }, + { 900, 100, "cmd_record" }, +}; + +static struct fake_sym bash_syms[] = { + { 700, 100, "main" }, + { 800, 100, "xmalloc" }, + { 900, 100, "xfree" }, +}; + +static struct fake_sym libc_syms[] = { + { 700, 100, "malloc" }, + { 800, 100, "free" }, + { 900, 100, "realloc" }, +}; + +static struct fake_sym kernel_syms[] = { + { 700, 100, "schedule" }, + { 800, 100, "page_fault" }, + { 900, 100, "sys_perf_event_open" }, +}; + +static struct { + const char *dso_name; + struct fake_sym *syms; + size_t nr_syms; +} fake_symbols[] = { + { "perf", perf_syms, ARRAY_SIZE(perf_syms) }, + { "bash", bash_syms, ARRAY_SIZE(bash_syms) }, + { "libc", libc_syms, ARRAY_SIZE(libc_syms) }, + { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) }, +}; + +struct machine *setup_fake_machine(struct machines *machines) +{ + struct machine *machine = machines__find(machines, HOST_KERNEL_ID); + size_t i; + + if (machine == NULL) { + pr_debug("Not enough memory for machine setup\n"); + return NULL; + } + + for (i = 0; i < ARRAY_SIZE(fake_threads); i++) { + struct thread *thread; + + thread = machine__findnew_thread(machine, fake_threads[i].pid, + fake_threads[i].pid); + if (thread == NULL) + goto out; + + thread__set_comm(thread, fake_threads[i].comm, 0); + } + + for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { + union perf_event fake_mmap_event = { + .mmap = { + .header = { .misc = PERF_RECORD_MISC_USER, }, + .pid = fake_mmap_info[i].pid, + .tid = fake_mmap_info[i].pid, + .start = fake_mmap_info[i].start, + .len = 0x1000ULL, + .pgoff = 0ULL, + }, + }; + + strcpy(fake_mmap_event.mmap.filename, + fake_mmap_info[i].filename); + + machine__process_mmap_event(machine, &fake_mmap_event, NULL); + } + + for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { + size_t k; + struct dso *dso; + + dso = __dsos__findnew(&machine->user_dsos, + fake_symbols[i].dso_name); + if (dso == NULL) + goto out; + + /* emulate dso__load() */ + dso__set_loaded(dso, MAP__FUNCTION); + + for (k = 0; k < fake_symbols[i].nr_syms; k++) { + struct symbol *sym; + struct fake_sym *fsym = &fake_symbols[i].syms[k]; + + sym = symbol__new(fsym->start, fsym->length, + STB_GLOBAL, fsym->name); + if (sym == NULL) + goto out; + + symbols__insert(&dso->symbols[MAP__FUNCTION], sym); + } + } + + return machine; + +out: + pr_debug("Not enough memory for machine setup\n"); + machine__delete_threads(machine); + machine__delete(machine); + return NULL; +} diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h new file mode 100644 index 000000000000..2528b8fc105a --- /dev/null +++ b/tools/perf/tests/hists_common.h @@ -0,0 +1,44 @@ +#ifndef __PERF_TESTS__HISTS_COMMON_H__ +#define __PERF_TESTS__HISTS_COMMON_H__ + +struct machine; +struct machines; + +/* + * The setup_fake_machine() provides a test environment which consists + * of 3 processes that have 3 mappings and in turn, have 3 symbols + * respectively. See below table: + * + * Command: Pid Shared Object Symbol + * ............. ............. ................... + * perf: 100 perf main + * perf: 100 perf run_command + * perf: 100 perf comd_record + * perf: 100 libc malloc + * perf: 100 libc free + * perf: 100 libc realloc + * perf: 100 [kernel] schedule + * perf: 100 [kernel] page_fault + * perf: 100 [kernel] sys_perf_event_open + * perf: 200 perf main + * perf: 200 perf run_command + * perf: 200 perf comd_record + * perf: 200 libc malloc + * perf: 200 libc free + * perf: 200 libc realloc + * perf: 200 [kernel] schedule + * perf: 200 [kernel] page_fault + * perf: 200 [kernel] sys_perf_event_open + * bash: 300 bash main + * bash: 300 bash xmalloc + * bash: 300 bash xfree + * bash: 300 libc malloc + * bash: 300 libc free + * bash: 300 libc realloc + * bash: 300 [kernel] schedule + * bash: 300 [kernel] page_fault + * bash: 300 [kernel] sys_perf_event_open + */ +struct machine *setup_fake_machine(struct machines *machines); + +#endif /* __PERF_TESTS__HISTS_COMMON_H__ */ diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c new file mode 100644 index 000000000000..23dc2f4d12c3 --- /dev/null +++ b/tools/perf/tests/hists_filter.c @@ -0,0 +1,315 @@ +#include "perf.h" +#include "util/debug.h" +#include "util/symbol.h" +#include "util/sort.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/thread.h" +#include "util/parse-events.h" +#include "tests/tests.h" +#include "tests/hists_common.h" + +struct sample { + u32 pid; + u64 ip; + struct thread *thread; + struct map *map; + struct symbol *sym; +}; + +/* For the numbers, see hists_common.c */ +static struct sample fake_samples[] = { + /* perf [kernel] schedule() */ + { .pid = 100, .ip = 0xf0000 + 700, }, + /* perf [perf] main() */ + { .pid = 100, .ip = 0x40000 + 700, }, + /* perf [libc] malloc() */ + { .pid = 100, .ip = 0x50000 + 700, }, + /* perf [perf] main() */ + { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ + /* perf [perf] cmd_record() */ + { .pid = 200, .ip = 0x40000 + 900, }, + /* perf [kernel] page_fault() */ + { .pid = 200, .ip = 0xf0000 + 800, }, + /* bash [bash] main() */ + { .pid = 300, .ip = 0x40000 + 700, }, + /* bash [bash] xmalloc() */ + { .pid = 300, .ip = 0x40000 + 800, }, + /* bash [libc] malloc() */ + { .pid = 300, .ip = 0x50000 + 700, }, + /* bash [kernel] page_fault() */ + { .pid = 300, .ip = 0xf0000 + 800, }, +}; + +static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) +{ + struct perf_evsel *evsel; + struct addr_location al; + struct hist_entry *he; + struct perf_sample sample = { .cpu = 0, }; + size_t i; + + /* + * each evsel will have 10 samples but the 4th sample + * (perf [perf] main) will be collapsed to an existing entry + * so total 9 entries will be in the tree. + */ + evlist__for_each(evlist, evsel) { + for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { + const union perf_event event = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + }; + + /* make sure it has no filter at first */ + evsel->hists.thread_filter = NULL; + evsel->hists.dso_filter = NULL; + evsel->hists.symbol_filter_str = NULL; + + sample.pid = fake_samples[i].pid; + sample.ip = fake_samples[i].ip; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample) < 0) + goto out; + + he = __hists__add_entry(&evsel->hists, &al, NULL, + NULL, NULL, 100, 1, 0); + if (he == NULL) + goto out; + + fake_samples[i].thread = al.thread; + fake_samples[i].map = al.map; + fake_samples[i].sym = al.sym; + + hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); + if (!he->filtered) + he->hists->stats.nr_non_filtered_samples++; + } + } + + return 0; + +out: + pr_debug("Not enough memory for adding a hist entry\n"); + return TEST_FAIL; +} + +static void print_hists(struct hists *hists) +{ + int i = 0; + struct rb_root *root; + struct rb_node *node; + + root = &hists->entries; + + pr_info("----- %s --------\n", __func__); + node = rb_first(root); + while (node) { + struct hist_entry *he; + + he = rb_entry(node, struct hist_entry, rb_node); + + if (!he->filtered) { + pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n", + i, thread__comm_str(he->thread), + he->ms.map->dso->short_name, + he->ms.sym->name, he->stat.period); + } + + i++; + node = rb_next(node); + } +} + +int test__hists_filter(void) +{ + int err = TEST_FAIL; + struct machines machines; + struct machine *machine; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(); + + TEST_ASSERT_VAL("No memory", evlist); + + err = parse_events(evlist, "cpu-clock"); + if (err) + goto out; + err = parse_events(evlist, "task-clock"); + if (err) + goto out; + + /* default sort order (comm,dso,sym) will be used */ + if (setup_sorting() < 0) + goto out; + + machines__init(&machines); + + /* setup threads/dso/map/symbols also */ + machine = setup_fake_machine(&machines); + if (!machine) + goto out; + + if (verbose > 1) + machine__fprintf(machine, stderr); + + /* process sample events */ + err = add_hist_entries(evlist, machine); + if (err < 0) + goto out; + + evlist__for_each(evlist, evsel) { + struct hists *hists = &evsel->hists; + + hists__collapse_resort(hists, NULL); + hists__output_resort(hists); + + if (verbose > 2) { + pr_info("Normal histogram\n"); + print_hists(hists); + } + + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + TEST_ASSERT_VAL("Unmatched nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == + hists->stats.nr_non_filtered_samples); + TEST_ASSERT_VAL("Unmatched nr hist entries", + hists->nr_entries == hists->nr_non_filtered_entries); + TEST_ASSERT_VAL("Unmatched total period", + hists->stats.total_period == + hists->stats.total_non_filtered_period); + + /* now applying thread filter for 'bash' */ + evsel->hists.thread_filter = fake_samples[9].thread; + hists__filter_by_thread(hists); + + if (verbose > 2) { + pr_info("Histogram for thread filter\n"); + print_hists(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for thread filter", + hists->stats.nr_non_filtered_samples == 4); + TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter", + hists->nr_non_filtered_entries == 4); + TEST_ASSERT_VAL("Unmatched total period for thread filter", + hists->stats.total_non_filtered_period == 400); + + /* remove thread filter first */ + evsel->hists.thread_filter = NULL; + hists__filter_by_thread(hists); + + /* now applying dso filter for 'kernel' */ + evsel->hists.dso_filter = fake_samples[0].map->dso; + hists__filter_by_dso(hists); + + if (verbose > 2) { + pr_info("Histogram for dso filter\n"); + print_hists(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for dso filter", + hists->stats.nr_non_filtered_samples == 3); + TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter", + hists->nr_non_filtered_entries == 3); + TEST_ASSERT_VAL("Unmatched total period for dso filter", + hists->stats.total_non_filtered_period == 300); + + /* remove dso filter first */ + evsel->hists.dso_filter = NULL; + hists__filter_by_dso(hists); + + /* + * now applying symbol filter for 'main'. Also note that + * there's 3 samples that have 'main' symbol but the 4th + * entry of fake_samples was collapsed already so it won't + * be counted as a separate entry but the sample count and + * total period will be remained. + */ + evsel->hists.symbol_filter_str = "main"; + hists__filter_by_symbol(hists); + + if (verbose > 2) { + pr_info("Histogram for symbol filter\n"); + print_hists(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for symbol filter", + hists->stats.nr_non_filtered_samples == 3); + TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter", + hists->nr_non_filtered_entries == 2); + TEST_ASSERT_VAL("Unmatched total period for symbol filter", + hists->stats.total_non_filtered_period == 300); + + /* now applying all filters at once. */ + evsel->hists.thread_filter = fake_samples[1].thread; + evsel->hists.dso_filter = fake_samples[1].map->dso; + hists__filter_by_thread(hists); + hists__filter_by_dso(hists); + + if (verbose > 2) { + pr_info("Histogram for all filters\n"); + print_hists(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for all filter", + hists->stats.nr_non_filtered_samples == 2); + TEST_ASSERT_VAL("Unmatched nr hist entries for all filter", + hists->nr_non_filtered_entries == 1); + TEST_ASSERT_VAL("Unmatched total period for all filter", + hists->stats.total_non_filtered_period == 200); + } + + + err = TEST_OK; + +out: + /* tear down everything */ + perf_evlist__delete(evlist); + machines__exit(&machines); + + return err; +} diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 7ccbc7b6ae77..e42d6790811a 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -8,145 +8,7 @@ #include "machine.h" #include "thread.h" #include "parse-events.h" - -static struct { - u32 pid; - const char *comm; -} fake_threads[] = { - { 100, "perf" }, - { 200, "perf" }, - { 300, "bash" }, -}; - -static struct { - u32 pid; - u64 start; - const char *filename; -} fake_mmap_info[] = { - { 100, 0x40000, "perf" }, - { 100, 0x50000, "libc" }, - { 100, 0xf0000, "[kernel]" }, - { 200, 0x40000, "perf" }, - { 200, 0x50000, "libc" }, - { 200, 0xf0000, "[kernel]" }, - { 300, 0x40000, "bash" }, - { 300, 0x50000, "libc" }, - { 300, 0xf0000, "[kernel]" }, -}; - -struct fake_sym { - u64 start; - u64 length; - const char *name; -}; - -static struct fake_sym perf_syms[] = { - { 700, 100, "main" }, - { 800, 100, "run_command" }, - { 900, 100, "cmd_record" }, -}; - -static struct fake_sym bash_syms[] = { - { 700, 100, "main" }, - { 800, 100, "xmalloc" }, - { 900, 100, "xfree" }, -}; - -static struct fake_sym libc_syms[] = { - { 700, 100, "malloc" }, - { 800, 100, "free" }, - { 900, 100, "realloc" }, -}; - -static struct fake_sym kernel_syms[] = { - { 700, 100, "schedule" }, - { 800, 100, "page_fault" }, - { 900, 100, "sys_perf_event_open" }, -}; - -static struct { - const char *dso_name; - struct fake_sym *syms; - size_t nr_syms; -} fake_symbols[] = { - { "perf", perf_syms, ARRAY_SIZE(perf_syms) }, - { "bash", bash_syms, ARRAY_SIZE(bash_syms) }, - { "libc", libc_syms, ARRAY_SIZE(libc_syms) }, - { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) }, -}; - -static struct machine *setup_fake_machine(struct machines *machines) -{ - struct machine *machine = machines__find(machines, HOST_KERNEL_ID); - size_t i; - - if (machine == NULL) { - pr_debug("Not enough memory for machine setup\n"); - return NULL; - } - - for (i = 0; i < ARRAY_SIZE(fake_threads); i++) { - struct thread *thread; - - thread = machine__findnew_thread(machine, fake_threads[i].pid, - fake_threads[i].pid); - if (thread == NULL) - goto out; - - thread__set_comm(thread, fake_threads[i].comm, 0); - } - - for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { - union perf_event fake_mmap_event = { - .mmap = { - .header = { .misc = PERF_RECORD_MISC_USER, }, - .pid = fake_mmap_info[i].pid, - .tid = fake_mmap_info[i].pid, - .start = fake_mmap_info[i].start, - .len = 0x1000ULL, - .pgoff = 0ULL, - }, - }; - - strcpy(fake_mmap_event.mmap.filename, - fake_mmap_info[i].filename); - - machine__process_mmap_event(machine, &fake_mmap_event, NULL); - } - - for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { - size_t k; - struct dso *dso; - - dso = __dsos__findnew(&machine->user_dsos, - fake_symbols[i].dso_name); - if (dso == NULL) - goto out; - - /* emulate dso__load() */ - dso__set_loaded(dso, MAP__FUNCTION); - - for (k = 0; k < fake_symbols[i].nr_syms; k++) { - struct symbol *sym; - struct fake_sym *fsym = &fake_symbols[i].syms[k]; - - sym = symbol__new(fsym->start, fsym->length, - STB_GLOBAL, fsym->name); - if (sym == NULL) - goto out; - - symbols__insert(&dso->symbols[MAP__FUNCTION], sym); - } - } - - return machine; - -out: - pr_debug("Not enough memory for machine setup\n"); - machine__delete_threads(machine); - machine__delete(machine); - return NULL; -} +#include "hists_common.h" struct sample { u32 pid; @@ -156,6 +18,7 @@ struct sample { struct symbol *sym; }; +/* For the numbers, see hists_common.c */ static struct sample fake_common_samples[] = { /* perf [kernel] schedule() */ { .pid = 100, .ip = 0xf0000 + 700, }, diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c new file mode 100644 index 000000000000..4a456fef66ca --- /dev/null +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -0,0 +1,233 @@ +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "tests.h" +#include "machine.h" +#include "thread_map.h" +#include "symbol.h" +#include "thread.h" + +#define THREADS 4 + +static int go_away; + +struct thread_data { + pthread_t pt; + pid_t tid; + void *map; + int ready[2]; +}; + +static struct thread_data threads[THREADS]; + +static int thread_init(struct thread_data *td) +{ + void *map; + + map = mmap(NULL, page_size, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_SHARED|MAP_ANONYMOUS, -1, 0); + + if (map == MAP_FAILED) { + perror("mmap failed"); + return -1; + } + + td->map = map; + td->tid = syscall(SYS_gettid); + + pr_debug("tid = %d, map = %p\n", td->tid, map); + return 0; +} + +static void *thread_fn(void *arg) +{ + struct thread_data *td = arg; + ssize_t ret; + int go; + + if (thread_init(td)) + return NULL; + + /* Signal thread_create thread is initialized. */ + ret = write(td->ready[1], &go, sizeof(int)); + if (ret != sizeof(int)) { + pr_err("failed to notify\n"); + return NULL; + } + + while (!go_away) { + /* Waiting for main thread to kill us. */ + usleep(100); + } + + munmap(td->map, page_size); + return NULL; +} + +static int thread_create(int i) +{ + struct thread_data *td = &threads[i]; + int err, go; + + if (pipe(td->ready)) + return -1; + + err = pthread_create(&td->pt, NULL, thread_fn, td); + if (!err) { + /* Wait for thread initialization. */ + ssize_t ret = read(td->ready[0], &go, sizeof(int)); + err = ret != sizeof(int); + } + + close(td->ready[0]); + close(td->ready[1]); + return err; +} + +static int threads_create(void) +{ + struct thread_data *td0 = &threads[0]; + int i, err = 0; + + go_away = 0; + + /* 0 is main thread */ + if (thread_init(td0)) + return -1; + + for (i = 1; !err && i < THREADS; i++) + err = thread_create(i); + + return err; +} + +static int threads_destroy(void) +{ + struct thread_data *td0 = &threads[0]; + int i, err = 0; + + /* cleanup the main thread */ + munmap(td0->map, page_size); + + go_away = 1; + + for (i = 1; !err && i < THREADS; i++) + err = pthread_join(threads[i].pt, NULL); + + return err; +} + +typedef int (*synth_cb)(struct machine *machine); + +static int synth_all(struct machine *machine) +{ + return perf_event__synthesize_threads(NULL, + perf_event__process, + machine, 0); +} + +static int synth_process(struct machine *machine) +{ + struct thread_map *map; + int err; + + map = thread_map__new_by_pid(getpid()); + + err = perf_event__synthesize_thread_map(NULL, map, + perf_event__process, + machine, 0); + + thread_map__delete(map); + return err; +} + +static int mmap_events(synth_cb synth) +{ + struct machines machines; + struct machine *machine; + int err, i; + + /* + * The threads_create will not return before all threads + * are spawned and all created memory map. + * + * They will loop until threads_destroy is called, so we + * can safely run synthesizing function. + */ + TEST_ASSERT_VAL("failed to create threads", !threads_create()); + + machines__init(&machines); + machine = &machines.host; + + dump_trace = verbose > 1 ? 1 : 0; + + err = synth(machine); + + dump_trace = 0; + + TEST_ASSERT_VAL("failed to destroy threads", !threads_destroy()); + TEST_ASSERT_VAL("failed to synthesize maps", !err); + + /* + * All data is synthesized, try to find map for each + * thread object. + */ + for (i = 0; i < THREADS; i++) { + struct thread_data *td = &threads[i]; + struct addr_location al; + struct thread *thread; + + thread = machine__findnew_thread(machine, getpid(), td->tid); + + pr_debug("looking for map %p\n", td->map); + + thread__find_addr_map(thread, machine, + PERF_RECORD_MISC_USER, MAP__FUNCTION, + (unsigned long) (td->map + 1), &al); + + if (!al.map) { + pr_debug("failed, couldn't find map\n"); + err = -1; + break; + } + + pr_debug("map %p, addr %" PRIx64 "\n", al.map, al.map->start); + } + + machine__delete_threads(machine); + machines__exit(&machines); + return err; +} + +/* + * This test creates 'THREADS' number of threads (including + * main thread) and each thread creates memory map. + * + * When threads are created, we synthesize them with both + * (separate tests): + * perf_event__synthesize_thread_map (process based) + * perf_event__synthesize_threads (global) + * + * We test we can find all memory maps via: + * thread__find_addr_map + * + * by using all thread objects. + */ +int test__mmap_thread_lookup(void) +{ + /* perf_event__synthesize_threads synthesize */ + TEST_ASSERT_VAL("failed with sythesizing all", + !mmap_events(synth_all)); + + /* perf_event__synthesize_thread_map synthesize */ + TEST_ASSERT_VAL("failed with sythesizing process", + !mmap_events(synth_process)); + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index a24795ca002d..a9d7cb019f9e 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -41,6 +41,9 @@ int test__sample_parsing(void); int test__keep_tracking(void); int test__parse_no_sample_id_all(void); int test__dwarf_unwind(void); +int test__hists_filter(void); +int test__mmap_thread_lookup(void); +int test__thread_mg_share(void); #if defined(__x86_64__) || defined(__i386__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c new file mode 100644 index 000000000000..2b2e0dbe114f --- /dev/null +++ b/tools/perf/tests/thread-mg-share.c @@ -0,0 +1,90 @@ +#include "tests.h" +#include "machine.h" +#include "thread.h" +#include "map.h" + +int test__thread_mg_share(void) +{ + struct machines machines; + struct machine *machine; + + /* thread group */ + struct thread *leader; + struct thread *t1, *t2, *t3; + struct map_groups *mg; + + /* other process */ + struct thread *other, *other_leader; + struct map_groups *other_mg; + + /* + * This test create 2 processes abstractions (struct thread) + * with several threads and checks they properly share and + * maintain map groups info (struct map_groups). + * + * thread group (pid: 0, tids: 0, 1, 2, 3) + * other group (pid: 4, tids: 4, 5) + */ + + machines__init(&machines); + machine = &machines.host; + + /* create process with 4 threads */ + leader = machine__findnew_thread(machine, 0, 0); + t1 = machine__findnew_thread(machine, 0, 1); + t2 = machine__findnew_thread(machine, 0, 2); + t3 = machine__findnew_thread(machine, 0, 3); + + /* and create 1 separated process, without thread leader */ + other = machine__findnew_thread(machine, 4, 5); + + TEST_ASSERT_VAL("failed to create threads", + leader && t1 && t2 && t3 && other); + + mg = leader->mg; + TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4); + + /* test the map groups pointer is shared */ + TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); + TEST_ASSERT_VAL("map groups don't match", mg == t2->mg); + TEST_ASSERT_VAL("map groups don't match", mg == t3->mg); + + /* + * Verify the other leader was created by previous call. + * It should have shared map groups with no change in + * refcnt. + */ + other_leader = machine__find_thread(machine, 4, 4); + TEST_ASSERT_VAL("failed to find other leader", other_leader); + + other_mg = other->mg; + TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2); + + TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); + + /* release thread group */ + thread__delete(leader); + TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3); + + thread__delete(t1); + TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2); + + thread__delete(t2); + TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1); + + thread__delete(t3); + + /* release other group */ + thread__delete(other_leader); + TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1); + + thread__delete(other); + + /* + * Cannot call machine__delete_threads(machine) now, + * because we've already released all the threads. + */ + + machines__exit(&machines); + return 0; +} diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index d59893edf031..9eccf7f4f367 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -495,7 +495,7 @@ print_entries: break; if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(&h->thread->mg, + __map_groups__fprintf_maps(h->thread->mg, MAP__FUNCTION, verbose, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9d12aa6dd485..dbcaea1a8180 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -699,7 +699,7 @@ void thread__find_addr_map(struct thread *thread, enum map_type type, u64 addr, struct addr_location *al) { - struct map_groups *mg = &thread->mg; + struct map_groups *mg = thread->mg; bool load_map = false; al->machine = machine; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a53cd0b8c151..98ec56dc890b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -316,6 +316,17 @@ static struct thread *__machine__findnew_thread(struct machine *machine, rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, &machine->threads); machine->last_match = th; + + /* + * We have to initialize map_groups separately + * after rb tree is updated. + * + * The reason is that we call machine__findnew_thread + * within thread__init_map_groups to find the thread + * leader and that would screwed the rb tree. + */ + if (thread__init_map_groups(th, machine)) + return NULL; } return th; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 39cd2d0faff6..ba5f5c0c838b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -323,6 +323,7 @@ void map_groups__init(struct map_groups *mg) INIT_LIST_HEAD(&mg->removed_maps[i]); } mg->machine = NULL; + mg->refcnt = 1; } static void maps__delete(struct rb_root *maps) @@ -358,6 +359,28 @@ void map_groups__exit(struct map_groups *mg) } } +struct map_groups *map_groups__new(void) +{ + struct map_groups *mg = malloc(sizeof(*mg)); + + if (mg != NULL) + map_groups__init(mg); + + return mg; +} + +void map_groups__delete(struct map_groups *mg) +{ + map_groups__exit(mg); + free(mg); +} + +void map_groups__put(struct map_groups *mg) +{ + if (--mg->refcnt == 0) + map_groups__delete(mg); +} + void map_groups__flush(struct map_groups *mg) { int type; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index f00f058afb3b..d6445b27d672 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -59,8 +59,20 @@ struct map_groups { struct rb_root maps[MAP__NR_TYPES]; struct list_head removed_maps[MAP__NR_TYPES]; struct machine *machine; + int refcnt; }; +struct map_groups *map_groups__new(void); +void map_groups__delete(struct map_groups *mg); + +static inline struct map_groups *map_groups__get(struct map_groups *mg) +{ + ++mg->refcnt; + return mg; +} + +void map_groups__put(struct map_groups *mg); + static inline struct kmap *map__kmap(struct map *map) { return (struct kmap *)(map + 1); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3ce0498bdae6..2fde0d5e40b5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -8,6 +8,22 @@ #include "debug.h" #include "comm.h" +int thread__init_map_groups(struct thread *thread, struct machine *machine) +{ + struct thread *leader; + pid_t pid = thread->pid_; + + if (pid == thread->tid) { + thread->mg = map_groups__new(); + } else { + leader = machine__findnew_thread(machine, pid, pid); + if (leader) + thread->mg = map_groups__get(leader->mg); + } + + return thread->mg ? 0 : -1; +} + struct thread *thread__new(pid_t pid, pid_t tid) { char *comm_str; @@ -15,7 +31,6 @@ struct thread *thread__new(pid_t pid, pid_t tid) struct thread *thread = zalloc(sizeof(*thread)); if (thread != NULL) { - map_groups__init(&thread->mg); thread->pid_ = pid; thread->tid = tid; thread->ppid = -1; @@ -45,7 +60,8 @@ void thread__delete(struct thread *thread) { struct comm *comm, *tmp; - map_groups__exit(&thread->mg); + map_groups__put(thread->mg); + thread->mg = NULL; list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); @@ -111,18 +127,35 @@ int thread__comm_len(struct thread *thread) size_t thread__fprintf(struct thread *thread, FILE *fp) { return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) + - map_groups__fprintf(&thread->mg, verbose, fp); + map_groups__fprintf(thread->mg, verbose, fp); } void thread__insert_map(struct thread *thread, struct map *map) { - map_groups__fixup_overlappings(&thread->mg, map, verbose, stderr); - map_groups__insert(&thread->mg, map); + map_groups__fixup_overlappings(thread->mg, map, verbose, stderr); + map_groups__insert(thread->mg, map); +} + +static int thread__clone_map_groups(struct thread *thread, + struct thread *parent) +{ + int i; + + /* This is new thread, we share map groups for process. */ + if (thread->pid_ == parent->pid_) + return 0; + + /* But this one is new process, copy maps. */ + for (i = 0; i < MAP__NR_TYPES; ++i) + if (map_groups__clone(thread->mg, parent->mg, i) < 0) + return -ENOMEM; + + return 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) { - int i, err; + int err; if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -134,13 +167,8 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) thread->comm_set = true; } - for (i = 0; i < MAP__NR_TYPES; ++i) - if (map_groups__clone(&thread->mg, &parent->mg, i) < 0) - return -ENOMEM; - thread->ppid = parent->tid; - - return 0; + return thread__clone_map_groups(thread, parent); } void thread__find_cpumode_addr_location(struct thread *thread, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 9b29f085aede..3c0c2724f82c 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,7 +13,7 @@ struct thread { struct rb_node rb_node; struct list_head node; }; - struct map_groups mg; + struct map_groups *mg; pid_t pid_; /* Not all tools update this */ pid_t tid; pid_t ppid; @@ -30,6 +30,7 @@ struct machine; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); +int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); static inline void thread__exited(struct thread *thread) {