From 0b80f8b32f31f80e001fb6355fd86769555c4a3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:28:26 -0300 Subject: [PATCH 01/16] perf evsel: Improve tracepoint constructor setup It needs to properly set the sample_type, sample_period and the KVM related perf_event_attr fields. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 00936ad29ff2..2467eaf2968f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -117,14 +117,18 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) if (evsel != NULL) { struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, + .type = PERF_TYPE_TRACEPOINT, + .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), }; evsel->tp_format = event_format__new(sys, name); if (evsel->tp_format == NULL) goto out_free; + event_attr_init(&attr); attr.config = evsel->tp_format->id; + attr.sample_period = 1; perf_evsel__init(evsel, &attr, idx); evsel->name = evsel->tp_format->name; } From a14bb7a6fd4a3d563ca971daf462ba4dc294a7a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:41:14 -0300 Subject: [PATCH 02/16] perf tools: Allow handling a NULL cpu_map as meaning "all cpus" Or one with cpu_map->map[0] == -1. Reducing the boilerplate in setting up an evlist by nor requiring a cpu_map to be created at all. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rnaqn3dtnsfo1wlbbf3fhx00@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.h | 11 +++++++++++ tools/perf/util/evlist.c | 12 ++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 17b5264f6436..2f68a3b8c285 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,6 +2,7 @@ #define __PERF_CPUMAP_H #include +#include struct cpu_map { int nr; @@ -14,4 +15,14 @@ void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); +static inline int cpu_map__nr(const struct cpu_map *map) +{ + return map ? map->nr : 1; +} + +static inline bool cpu_map__all(const struct cpu_map *map) +{ + return map ? map->map[0] == -1 : true; +} + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 892353729c7a..27f6de3aab16 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -304,7 +304,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) int cpu, thread; struct perf_evsel *pos; - for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) ioctl(FD(pos, cpu, thread), @@ -315,7 +315,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; + int nfds = cpu_map__nr(evlist->cpus) * evlist->threads->nr * evlist->nr_entries; evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); return evlist->pollfd != NULL ? 0 : -ENOMEM; } @@ -475,8 +475,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist) static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { - evlist->nr_mmaps = evlist->cpus->nr; - if (evlist->cpus->map[0] == -1) + evlist->nr_mmaps = cpu_map__nr(evlist->cpus); + if (cpu_map__all(evlist->cpus)) evlist->nr_mmaps = evlist->threads->nr; evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); return evlist->mmap != NULL ? 0 : -ENOMEM; @@ -622,11 +622,11 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } - if (evlist->cpus->map[0] == -1) + if (cpu_map__all(cpus)) return perf_evlist__mmap_per_thread(evlist, prot, mask); return perf_evlist__mmap_per_cpu(evlist, prot, mask); From 0807d2d8a381f4fc600ad481c3e77e5cdb624eed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 12:48:18 -0300 Subject: [PATCH 03/16] perf evsel: Know if byte swap is needed Instead of passing it around for parsing as an explicit parameter, will help with reading tracepoint fields when not using a perf session or pevent structure, i.e. for non perf.data centered workflows. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qa67ikv2sm49cwa7dyjhhp6g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 4 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 2 +- tools/perf/util/evsel.c | 13 ++++++++----- tools/perf/util/evsel.h | 3 ++- tools/perf/util/header.c | 6 +++++- tools/perf/util/python.c | 2 +- tools/perf/util/session.c | 6 ++---- 9 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 78b47a75a7c9..2fd2c031f627 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -564,7 +564,7 @@ static int test__basic_mmap(void) goto out_munmap; } - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; @@ -781,7 +781,7 @@ static int test__PERF_RECORD(void) if (type < PERF_RECORD_MAX) nr_events[type]++; - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose) perf_event__fprintf(event, stderr); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5550754c05f2..e434a16bb5ac 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -823,7 +823,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { - ret = perf_evlist__parse_sample(top->evlist, event, &sample, false); + ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); continue; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 27f6de3aab16..704bd91ce05d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -884,10 +884,10 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) } int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped) + struct perf_sample *sample) { struct perf_evsel *evsel = perf_evlist__first(evlist); - return perf_evsel__parse_sample(evsel, event, sample, swapped); + return perf_evsel__parse_sample(evsel, event, sample); } size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3f2e1e4ccdd5..008a95702bf7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -125,7 +125,7 @@ bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2467eaf2968f..fe9581b0323c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -767,11 +767,13 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } -static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample, - bool swapped) +static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, + const union perf_event *event, + struct perf_sample *sample) { + u64 type = evsel->attr.sample_type; const u64 *array = event->sample.array; + bool swapped = evsel->needs_swap; union u64_swap u; array += ((event->header.size - @@ -832,10 +834,11 @@ static bool sample_overlap(const union perf_event *event, } int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *data, bool swapped) + struct perf_sample *data) { u64 type = evsel->attr.sample_type; u64 regs_user = evsel->attr.sample_regs_user; + bool swapped = evsel->needs_swap; const u64 *array; /* @@ -852,7 +855,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data, swapped); + return perf_evsel__parse_id_sample(evsel, event, data); } array = event->sample.array; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bb445d1cbc7b..60d28853748e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -69,6 +69,7 @@ struct perf_evsel { struct cpu_map *cpus; unsigned int sample_size; bool supported; + bool needs_swap; /* parse modifier helper */ int exclude_GH; struct perf_evsel *leader; @@ -205,7 +206,7 @@ static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, void hists__init(struct hists *hists); int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 6aae3290358e..7daad237dea5 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1256,8 +1256,10 @@ read_event_desc(struct perf_header *ph, int fd) if (ret != (ssize_t)sizeof(nr)) goto error; - if (ph->needs_swap) + if (ph->needs_swap) { nr = bswap_32(nr); + evsel->needs_swap = true; + } evsel->name = do_read_string(fd, ph); @@ -2626,6 +2628,8 @@ int perf_session__read_header(struct perf_session *session, int fd) if (evsel == NULL) goto out_delete_evlist; + + evsel->needs_swap = header->needs_swap; /* * Do it before so that if perf_evsel__alloc_id fails, this * entry gets purged too at perf_evlist__delete(). diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ca85444bcfbf..9181bf212fb9 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -805,7 +805,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (pyevent == NULL) return PyErr_NoMemory(); - err = perf_evlist__parse_sample(evlist, event, &pevent->sample, false); + err = perf_evlist__parse_sample(evlist, event, &pevent->sample); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3049b0ae7003..8cdd23239c90 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -722,8 +722,7 @@ static int flush_sample_queue(struct perf_session *s, if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample, - s->header.needs_swap); + ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { @@ -1174,8 +1173,7 @@ static int perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample, - session->header.needs_swap); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); if (ret) return ret; From e6b6f6795265ec19ff35572f527bb74c07ff9399 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 13:13:04 -0300 Subject: [PATCH 04/16] perf evsel: Handle endianity in intval method We were relying on the info in pevent, but since we have it in perf_evsel, set up by the perf_session routine if read from a perf.data file or by whoever creates the evsels, use it. New 'perf test' entries will use it to parse locally generated events, in a non perf.data centered workflow. As well as use byteswap.h to get per arch optimized swap routines, like other parts of perf (header, perf_evsel__parse_sample, symbol, etc) already do. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8tjuxk09mlsfmh7macgkxsip@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fe9581b0323c..c78e42ab9770 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1108,13 +1108,43 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { struct format_field *field = perf_evsel__field(evsel, name); - u64 val; + void *ptr; + u64 value; if (!field) return 0; - val = pevent_read_number(evsel->tp_format->pevent, - sample->raw_data + field->offset, field->size); - return val; + ptr = sample->raw_data + field->offset; + switch (field->size) { + case 1: + return *(u8 *)ptr; + case 2: + value = *(u16 *)ptr; + break; + case 4: + value = *(u32 *)ptr; + break; + case 8: + value = *(u64 *)ptr; + break; + default: + return 0; + } + + if (!evsel->needs_swap) + return value; + + switch (field->size) { + case 2: + return bswap_16(value); + case 4: + return bswap_32(value); + case 8: + return bswap_64(value); + default: + return 0; + } + + return 0; } From eb2f270338461a4de659a21946739748160c0816 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 13:23:10 -0300 Subject: [PATCH 05/16] perf test: Add test to check we correctly parse and match syscall open parms It will set up a syscall open tracepoint event, generate an open with invalid flags, then check those flags were the ones reported in the tracepoint fired. For the filename we need vfs:getname, but that will go thru some more iterations as the vfs getname codebase is going thru changes lately. When that is in I'll just check that the perf_evsel__newtp constructor is not bailing out and then add it to the evlist, catch the event and check the filename against the one used in the 'open' call used to trigger the event. Cc: David Ahern Cc: Eric Paris Cc: Frederic Weisbecker Cc: Jeff Layton Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-p5w9aq0jcbb91ghzqomowm16@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-test.c | 116 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 2fd2c031f627..484f26cc0c00 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -1289,6 +1289,118 @@ static int perf_evsel__tp_sched_test(void) return ret; } +static int test__syscall_open_tp_fields(void) +{ + struct perf_record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .no_delay = true, + .freq = 1, + .mmap_pages = 256, + .raw_samples = true, + }; + const char *filename = "/etc/passwd"; + int flags = O_RDONLY | O_DIRECTORY; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel; + int err = -1, i, nr_events = 0, nr_polls = 0; + + if (evlist == NULL) { + pr_debug("%s: perf_evlist__new\n", __func__); + goto out; + } + + evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0); + if (evsel == NULL) { + pr_debug("%s: perf_evsel__newtp\n", __func__); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel); + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("%s: perf_evlist__create_maps\n", __func__); + goto out_delete_evlist; + } + + perf_evsel__config(evsel, &opts, evsel); + + evlist->threads->map[0] = getpid(); + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); + + /* + * Generate the event: + */ + open(filename, flags); + + while (1) { + int before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + int tp_flags; + struct perf_sample sample; + + ++nr_events; + + if (type != PERF_RECORD_SAMPLE) + continue; + + err = perf_evsel__parse_sample(evsel, event, &sample); + if (err) { + pr_err("Can't parse sample, err = %d\n", err); + goto out_munmap; + } + + tp_flags = perf_evsel__intval(evsel, &sample, "flags"); + + if (flags != tp_flags) { + pr_debug("%s: Expected flags=%#x, got %#x\n", + __func__, flags, tp_flags); + goto out_munmap; + } + + goto out_ok; + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, 10); + + if (++nr_polls > 5) { + pr_debug("%s: no events!\n", __func__); + goto out_munmap; + } + } +out_ok: + err = 0; +out_munmap: + perf_evlist__munmap(evlist); +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + static struct test { const char *desc; int (*func)(void); @@ -1339,6 +1451,10 @@ static struct test { .desc = "Check parsing of sched tracepoints fields", .func = perf_evsel__tp_sched_test, }, + { + .desc = "Generate and check syscalls:sys_enter_open event fields", + .func = test__syscall_open_tp_fields, + }, { .func = NULL, }, From 1491a63218d0fd764e7fab13aa6b82164bfc14d6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 14:43:13 -0300 Subject: [PATCH 06/16] perf evlist: Renane set_filters method to apply_filters Because that is what it really does, i.e. it applies the filters that were parsed from the command line and stashed into the evsels they refer to. We'll need the set_filter method name to actually apply a filter to all the evsels in an evlist, for instance, to ask that a syswide tracer doesn't trace itself. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2cb74343de3e..f14cb5fdb91f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -320,7 +320,7 @@ try_again: } } - if (perf_evlist__set_filters(evlist)) { + if (perf_evlist__apply_filters(evlist)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); rc = -1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e0f65fe65944..e8cd4d81b06e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -478,7 +478,7 @@ static int run_perf_stat(int argc __maybe_unused, const char **argv) counter->supported = true; } - if (perf_evlist__set_filters(evsel_list)) { + if (perf_evlist__apply_filters(evsel_list)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 704bd91ce05d..c3ec7878985b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -666,7 +666,7 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) evlist->threads = NULL; } -int perf_evlist__set_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist) { const struct thread_map *threads = evlist->threads; const struct cpu_map *cpus = evlist->cpus; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 008a95702bf7..25e49e02810a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -115,7 +115,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, int perf_evlist__create_maps(struct perf_evlist *evlist, struct perf_target *target); void perf_evlist__delete_maps(struct perf_evlist *evlist); -int perf_evlist__set_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); From 745cefc5fba4350243bcb63cd6f75fb47ca77725 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 15:07:39 -0300 Subject: [PATCH 07/16] perf evlist: Introduce set_filter() method To apply a filter to all the evsels in an evlist. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 43 +++++++++++++++++++++++----------------- tools/perf/util/evlist.h | 2 ++ tools/perf/util/evsel.c | 18 +++++++++++++++++ tools/perf/util/evsel.h | 3 +++ 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c3ec7878985b..ae89686102f4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -668,30 +668,37 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) int perf_evlist__apply_filters(struct perf_evlist *evlist) { - const struct thread_map *threads = evlist->threads; - const struct cpu_map *cpus = evlist->cpus; struct perf_evsel *evsel; - char *filter; - int thread; - int cpu; - int err; - int fd; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; list_for_each_entry(evsel, &evlist->entries, node) { - filter = evsel->filter; - if (!filter) + if (evsel->filter == NULL) continue; - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - fd = FD(evsel, cpu, thread); - err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); - if (err) - return err; - } - } + + err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + if (err) + break; } - return 0; + return err; +} + +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) +{ + struct perf_evsel *evsel; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; + + list_for_each_entry(evsel, &evlist->entries, node) { + err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + if (err) + break; + } + + return err; } bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 25e49e02810a..3f1fb66be022 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -72,6 +72,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); + struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c78e42ab9770..6022daaa7417 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -505,6 +505,24 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) +{ + int cpu, thread; + + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + int fd = FD(evsel, cpu, thread), + err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); + + if (err) + return err; + } + } + + return 0; +} + int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 60d28853748e..25043ff17fd7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -115,6 +115,9 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); + int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); int perf_evsel__open_per_thread(struct perf_evsel *evsel, From e48ffe2bd49936314d367a8c6b5eaaa17d581d13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 17:11:38 -0300 Subject: [PATCH 08/16] perf evsel: The tracepoint constructor should store sys:name Not event_format->name, that doesn't contains the sys: part. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6022daaa7417..6f2a8c30413e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -122,6 +122,9 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), }; + if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) + goto out_free; + evsel->tp_format = event_format__new(sys, name); if (evsel->tp_format == NULL) goto out_free; @@ -130,12 +133,12 @@ struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) attr.config = evsel->tp_format->id; attr.sample_period = 1; perf_evsel__init(evsel, &attr, idx); - evsel->name = evsel->tp_format->name; } return evsel; out_free: + free(evsel->name); free(evsel); return NULL; } @@ -584,10 +587,8 @@ void perf_evsel__delete(struct perf_evsel *evsel) perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); free(evsel->group_name); - if (evsel->tp_format && evsel->name == evsel->tp_format->name) { - evsel->name = NULL; + if (evsel->tp_format) pevent_free_format(evsel->tp_format); - } free(evsel->name); free(evsel); } From 82fe1c290cd26a3d092beb1e9755647d284a4134 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 17:13:07 -0300 Subject: [PATCH 09/16] perf tools: Use perf_evsel__newtp in the event parser Elliminating code duplication. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9v4zl7ldlp8v6azrpsu5lupk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 46 ++++++++++++---------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bf5d033ee1b4..aed38e4b9dfa 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -356,42 +356,28 @@ int parse_events_add_cache(struct list_head **list, int *idx, return add_event(list, idx, &attr, name); } -static int add_tracepoint(struct list_head **list, int *idx, +static int add_tracepoint(struct list_head **listp, int *idx, char *sys_name, char *evt_name) { - struct perf_event_attr attr; - char name[MAX_NAME_LEN]; - char evt_path[MAXPATHLEN]; - char id_buf[4]; - u64 id; - int fd; + struct perf_evsel *evsel; + struct list_head *list = *listp; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, - sys_name, evt_name); - - fd = open(evt_path, O_RDONLY); - if (fd < 0) - return -1; - - if (read(fd, id_buf, sizeof(id_buf)) < 0) { - close(fd); - return -1; + if (!list) { + list = malloc(sizeof(*list)); + if (!list) + return -ENOMEM; + INIT_LIST_HEAD(list); } - close(fd); - id = atoll(id_buf); + evsel = perf_evsel__newtp(sys_name, evt_name, (*idx)++); + if (!evsel) { + free(list); + return -ENOMEM; + } - memset(&attr, 0, sizeof(attr)); - attr.config = id; - attr.type = PERF_TYPE_TRACEPOINT; - attr.sample_type |= PERF_SAMPLE_RAW; - attr.sample_type |= PERF_SAMPLE_TIME; - attr.sample_type |= PERF_SAMPLE_CPU; - attr.sample_type |= PERF_SAMPLE_PERIOD; - attr.sample_period = 1; - - snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name); - return add_event(list, idx, &attr, name); + list_add_tail(&evsel->node, list); + *listp = list; + return 0; } static int add_tracepoint_multi(struct list_head **list, int *idx, From 5d2074ea4fdb28b2199958f42edf914d27c926f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:22:00 -0300 Subject: [PATCH 10/16] perf evsel: Introduce rawptr() method Will be used for things like the args field in the raw_syscalls:sys_enter tracepoint. Implement strval with it, its basicaly strval returning void *. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- tools/perf/util/evsel.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6f2a8c30413e..e992b67e0f3a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1104,7 +1104,7 @@ struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *nam return pevent_find_field(evsel->tp_format, name); } -char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { struct format_field *field = perf_evsel__field(evsel, name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 25043ff17fd7..2c4905020105 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -128,11 +128,18 @@ void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); struct perf_sample; -char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample, +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name); u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name); +static inline char *perf_evsel__strval(struct perf_evsel *evsel, + struct perf_sample *sample, + const char *name) +{ + return perf_evsel__rawptr(evsel, sample, name); +} + struct format_field; struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); From 201b7334dc4e977479eb22b106ee8ec506e5e55c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:24:19 -0300 Subject: [PATCH 11/16] perf evsel: Export the event_format constructor It'll be needed in the next patches, where it'll be not associated directly to an evsel. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- tools/perf/util/evsel.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e992b67e0f3a..ffdd94e9c9c3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -70,7 +70,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } -static struct event_format *event_format__new(const char *sys, const char *name) +struct event_format *event_format__new(const char *sys, const char *name) { int fd, n; char *filename; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 2c4905020105..3ead0d59c03d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,6 +83,9 @@ struct perf_record_opts; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx); + +struct event_format *event_format__new(const char *sys, const char *name); + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); void perf_evsel__exit(struct perf_evsel *evsel); From 514f1c67c2fdae7b334fdc5adee63a484781241a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Sep 2012 20:05:56 -0300 Subject: [PATCH 12/16] perf trace: New tool Initially should look loosely like the venerable 'strace' tool, but using the infrastructure in the perf tools to allow tracing extra targets: [acme@sandy linux]$ perf trace --hell Error: unknown option `hell' usage: perf trace -p, --pid trace events on existing process id --tid trace events on existing thread id --all-cpus system-wide collection from all CPUs --cpu list of cpus to monitor --no-inherit child tasks do not inherit counters --mmap-pages number of mmap data pages --uid user to profile [acme@sandy linux]$ Those should have the same semantics as when using with 'perf record'. It gets stuck sometimes, but hey, it works sometimes too! In time it should support perf.data based workloads, i.e. it should have a: -o filename Command line option that will produce a perf.data file that can then be used with 'perf trace' or any of the other perf tools (script, report, etc). It will also eventually have the set of functionalities described in the previous 'trace' prototype by Thomas Gleixner: "Announcing a new utility: 'trace'" http://lwn.net/Articles/415728/ Also planned is to have some of the features suggested in the comments of that LWN article. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-v9x3q9rv4caxtox7wtjpchq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 53 +++++ tools/perf/Makefile | 3 +- tools/perf/builtin-trace.c | 300 ++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-trace.txt create mode 100644 tools/perf/builtin-trace.c diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 000000000000..3a2ae37310a9 --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,53 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +Initially this is a live mode only tool, but eventually will work with +perf.data files like the other tools, allowing a detached 'record' from +analysis phases. + +OPTIONS +------- + +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +--tid=:: + Record events on existing thread ID (comma separated list). + +--uid=:: + Record events in threads owned by uid. Name or number. + +--no-inherit:: + Child tasks do not inherit counters. + +--mmap-pages=:: + Number of mmap data pages. Must be a power of two. + +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 251dcd7fb5ac..6958ba4f5dcb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lpthread -lrt -lelf -lm -laudit ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -442,6 +442,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o +BUILTIN_OBJS += $(OUTPUT)builtin-trace.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 000000000000..5fa1820cc969 --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,300 @@ +#include "builtin.h" +#include "util/evlist.h" +#include "util/parse-options.h" +#include "util/thread_map.h" +#include "event-parse.h" + +#include +#include + +static struct syscall_fmt { + const char *name; + bool errmsg; + bool timeout; +} syscall_fmts[] = { + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, +}; + +static int syscall_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_fmt *syscall_fmt__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + +struct syscall { + struct event_format *tp_format; + const char *name; + struct syscall_fmt *fmt; +}; + +struct trace { + int audit_machine; + struct { + int max; + struct syscall *table; + } syscalls; + struct perf_record_opts opts; +}; + +static int trace__read_syscall_info(struct trace *trace, int id) +{ + char tp_name[128]; + struct syscall *sc; + + if (id > trace->syscalls.max) { + struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + + if (nsyscalls == NULL) + return -1; + + if (trace->syscalls.max != -1) { + memset(nsyscalls + trace->syscalls.max + 1, 0, + (id - trace->syscalls.max) * sizeof(*sc)); + } else { + memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); + } + + trace->syscalls.table = nsyscalls; + trace->syscalls.max = id; + } + + sc = trace->syscalls.table + id; + sc->name = audit_syscall_to_name(id, trace->audit_machine); + if (sc->name == NULL) + return -1; + + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + + sc->tp_format = event_format__new("syscalls", tp_name); + sc->fmt = syscall_fmt__find(sc->name); + + return sc->tp_format != NULL ? 0 : -1; +} + +static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp) +{ + int i = 0; + size_t printed = 0; + + if (sc->tp_format != NULL) { + struct format_field *field; + + for (field = sc->tp_format->format.fields->next; field; field = field->next) { + printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "", + field->name, args[i++]); + } + } else { + while (i < 6) { + printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]); + ++i; + } + } + + return printed; +} + +static int trace__run(struct trace *trace) +{ + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel, *evsel_enter, *evsel_exit; + int err = -1, i, nr_events = 0, before; + + if (evlist == NULL) { + printf("Not enough memory to run!\n"); + goto out; + } + + evsel_enter = perf_evsel__newtp("raw_syscalls", "sys_enter", 0); + if (evsel_enter == NULL) { + printf("Couldn't read the raw_syscalls:sys_enter tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_enter); + + evsel_exit = perf_evsel__newtp("raw_syscalls", "sys_exit", 1); + if (evsel_exit == NULL) { + printf("Couldn't read the raw_syscalls:sys_exit tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_exit); + + err = perf_evlist__create_maps(evlist, &trace->opts.target); + if (err < 0) { + printf("Problems parsing the target to trace, check your options!\n"); + goto out_delete_evlist; + } + + perf_evlist__config_attrs(evlist, &trace->opts); + + err = perf_evlist__open(evlist); + if (err < 0) { + printf("Couldn't create the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + printf("Couldn't mmap the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); +again: + before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + struct syscall *sc; + struct perf_sample sample; + int id; + + ++nr_events; + + switch (type) { + case PERF_RECORD_SAMPLE: + break; + case PERF_RECORD_LOST: + printf("LOST %" PRIu64 " events!\n", event->lost.lost); + continue; + default: + printf("Unexpected %s event, skipping...\n", + perf_event__name(type)); + continue; + } + + err = perf_evlist__parse_sample(evlist, event, &sample); + if (err) { + printf("Can't parse sample, err = %d, skipping...\n", err); + continue; + } + + evsel = perf_evlist__id2evsel(evlist, sample.id); + if (evsel == NULL) { + printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); + continue; + } + + id = perf_evsel__intval(evsel, &sample, "id"); + if (id < 0) { + printf("Invalid syscall %d id, skipping...\n", id); + continue; + } + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && + trace__read_syscall_info(trace, id)) + continue; + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) + continue; + + sc = &trace->syscalls.table[id]; + + if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1) + printf("%d ", sample.tid); + + if (evsel == evsel_enter) { + void *args = perf_evsel__rawptr(evsel, &sample, "args"); + + printf("%s(", sc->name); + syscall__fprintf_args(sc, args, stdout); + } else if (evsel == evsel_exit) { + int ret = perf_evsel__intval(evsel, &sample, "ret"); + + if (ret < 0 && sc->fmt && sc->fmt->errmsg) { + char bf[256]; + const char *emsg = strerror_r(-ret, bf, sizeof(bf)), + *e = audit_errno_to_name(-ret); + + printf(") = -1 %s %s", e, emsg); + } else if (ret == 0 && sc->fmt && sc->fmt->timeout) + printf(") = 0 Timeout"); + else + printf(") = %d", ret); + + putchar('\n'); + } + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, -1); + + goto again; + +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + +int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const char * const trace_usage[] = { + "perf trace []", + NULL + }; + struct trace trace = { + .audit_machine = audit_detect_machine(), + .syscalls = { + . max = -1, + }, + .opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .no_delay = true, + .mmap_pages = 1024, + }, + }; + const struct option trace_options[] = { + OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", + "trace events on existing process id"), + OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", + "trace events on existing thread id"), + OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", + "list of cpus to monitor"), + OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, + "child tasks do not inherit counters"), + OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, + "number of mmap data pages"), + OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", + "user to profile"), + OPT_END() + }; + int err; + + argc = parse_options(argc, argv, trace_options, trace_usage, 0); + if (argc) + usage_with_options(trace_usage, trace_options); + + err = perf_target__parse_uid(&trace.opts.target); + if (err) { + char bf[BUFSIZ]; + perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); + printf("%s", bf); + return err; + } + + return trace__run(&trace); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3ea74ed1b26b..08143bd854c7 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -34,6 +34,7 @@ extern int cmd_kmem(int argc, const char **argv, const char *prefix); extern int cmd_lock(int argc, const char **argv, const char *prefix); extern int cmd_kvm(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0303ec692274..3e86bbd8c2d5 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -17,6 +17,7 @@ perf-report mainporcelain common perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common +perf-trace mainporcelain common perf-script mainporcelain common perf-probe mainporcelain full perf-kmem mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index fb8578cfa03c..3fb052c9a27f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, + { "trace", cmd_trace, 0 }, { "inject", cmd_inject, 0 }, }; From 9ec60972a38011ad8a5676f4cd5e51ac508c36b6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 26 Sep 2012 16:47:28 +0900 Subject: [PATCH 13/16] perf hists: Add missing period_* fields when collapsing a hist entry So that the perf report won't lost the cpu utilization information. For example, if there're two process that have same name. $ perf report --stdio --showcpuutilization -s pid [SNIP] # Overhead sys us Command: Pid # ........ ........ ........ ............. # 55.12% 0.01% 55.10% noploop:28781 44.88% 0.06% 44.83% noploop:28782 Before: $ perf report --stdio --showcpuutilization -s comm [SNIP] # Overhead sys us # ........ ........ ........ # 100.00% 0.06% 44.83% After: $ perf report --stdio --showcpuutilization -s comm [SNIP] # Overhead sys us # ........ ........ ........ # 100.00% 0.07% 99.93% Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1348645663-25303-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6ec5398de89d..236bc9d98ff2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -410,8 +410,13 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, cmp = hist_entry__collapse(iter, he); if (!cmp) { - iter->period += he->period; - iter->nr_events += he->nr_events; + iter->period += he->period; + iter->period_sys += he->period_sys; + iter->period_us += he->period_us; + iter->period_guest_sys += he->period_guest_sys; + iter->period_guest_us += he->period_guest_us; + iter->nr_events += he->nr_events; + if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); callchain_merge(&callchain_cursor, From 4d29089c2b70ffeb61656ffd1b9c9c52602ddd44 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 Sep 2012 20:23:38 +0900 Subject: [PATCH 14/16] perf tools: Check libaudit availability for perf-trace builtin The newly added trace command requires an external audit library. However it can cause a build error because it's not checked whether the libaudit is installed on system: CC builtin-trace.o builtin-trace.c:7:22: fatal error: libaudit.h: No such file or directory compilation terminated. make: *** [builtin-trace.o] Error 1 Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1348745018-21744-1-git-send-email-namhyung@kernel.org [ committer note: Added ", disables 'trace tool' to the feature warning msg ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 16 ++++++++++++++-- tools/perf/config/feature-tests.mak | 11 +++++++++++ tools/perf/perf.c | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6958ba4f5dcb..e5e71e7d95a0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm -laudit +EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -442,7 +442,6 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-trace.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) @@ -560,6 +559,19 @@ else LIB_OBJS += $(OUTPUT)util/unwind.o endif +ifdef NO_LIBAUDIT + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT +else + FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit + ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT)),y) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT + else + BUILTIN_OBJS += $(OUTPUT)builtin-trace.o + EXTLIBS += -laudit + endif +endif + ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT else diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 116690a669d2..4add41bb0c7e 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -193,3 +193,14 @@ int main(void) } endef endif + +ifndef NO_LIBAUDIT +define SOURCE_LIBAUDIT +#include + +int main(void) +{ + return audit_open(); +} +endef +endif \ No newline at end of file diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3fb052c9a27f..fc2f770e3027 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,7 +55,9 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, +#ifndef NO_LIBAUDIT_SUPPORT { "trace", cmd_trace, 0 }, +#endif { "inject", cmd_inject, 0 }, }; From bb2d17a014914b628df42b0c76c85fa8a8c57e79 Mon Sep 17 00:00:00 2001 From: Hyeoncheol Lee Date: Thu, 27 Sep 2012 11:36:39 +0900 Subject: [PATCH 15/16] perf probe: Print an enum type variable in "enum variable-name" format when showing accessible variables When showing accessible variables, an enum type variable was printed in "variable-name" format. Change this format into "enum variable-name". Signed-off-by: Hyeoncheol Lee Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/1348713399-4541-1-git-send-email-hyc.lee@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index ee51e9b4dc09..3e5f5430a28a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -804,6 +804,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) tmp = "union "; else if (tag == DW_TAG_structure_type) tmp = "struct "; + else if (tag == DW_TAG_enumeration_type) + tmp = "enum "; /* Write a base name */ ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); return (ret >= len) ? -E2BIG : ret; From aec1930b0f6f281a0ca038cd03aceace3fe0bb61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Sep 2012 13:16:00 -0300 Subject: [PATCH 16/16] perf trace: Add aliases for some syscalls What we get from audit_syscall_to_name isn't what we find in the syscalls: tracepoint events, so add the alias that allows the tool to find prctl, fstat, fstatat and stat. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3m9su7jhwnxvepnr3ne1du5k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5fa1820cc969..8f113dab8bf1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -9,15 +9,20 @@ static struct syscall_fmt { const char *name; + const char *alias; bool errmsg; bool timeout; } syscall_fmts[] = { - { .name = "futex", .errmsg = true, }, - { .name = "poll", .errmsg = true, .timeout = true, }, - { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "read", .errmsg = true, }, - { .name = "recvfrom", .errmsg = true, }, - { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "stat", .errmsg = true, .alias = "newstat", }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -74,11 +79,16 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->name == NULL) return -1; - snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); - - sc->tp_format = event_format__new("syscalls", tp_name); sc->fmt = syscall_fmt__find(sc->name); + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + sc->tp_format = event_format__new("syscalls", tp_name); + + if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); + sc->tp_format = event_format__new("syscalls", tp_name); + } + return sc->tp_format != NULL ? 0 : -1; }