1
0
Fork 0
remarkable-linux/tools/perf/util/session.h

144 lines
3.5 KiB
C
Raw Normal View History

#ifndef __PERF_SESSION_H
#define __PERF_SESSION_H
#include "event.h"
#include "header.h"
#include "symbol.h"
#include "thread.h"
#include <linux/rbtree.h>
#include "../../../include/linux/perf_event.h"
perf: Generalize perf lock's sample event reordering to the session layer The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
2010-04-23 16:04:12 -06:00
struct sample_queue;
struct ip_callchain;
struct thread;
perf: Generalize perf lock's sample event reordering to the session layer The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
2010-04-23 16:04:12 -06:00
struct ordered_samples {
u64 last_flush;
u64 flush_limit;
struct list_head samples_head;
struct sample_queue *last_inserted;
};
struct perf_session {
struct perf_header header;
unsigned long size;
unsigned long mmap_window;
struct rb_root threads;
struct thread *last_match;
struct rb_root machines;
struct events_stats events_stats;
struct rb_root stats_by_id;
unsigned long event_total[PERF_RECORD_MAX];
unsigned long unknown_events;
struct rb_root hists;
u64 sample_type;
int fd;
bool fd_pipe;
int cwdlen;
char *cwd;
perf: Generalize perf lock's sample event reordering to the session layer The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
2010-04-23 16:04:12 -06:00
struct ordered_samples ordered_samples;
char filename[0];
};
typedef int (*event_op)(event_t *self, struct perf_session *session);
struct perf_event_ops {
event_op sample,
mmap,
comm,
fork,
exit,
lost,
read,
throttle,
unthrottle,
attr,
event_type,
tracing_data,
build_id;
perf: Generalize perf lock's sample event reordering to the session layer The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
2010-04-23 16:04:12 -06:00
bool ordered_samples;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force);
void perf_session__delete(struct perf_session *self);
perf tools: Cross platform perf.data analysis support There are still some problems related to loading vmlinux files, but those are unrelated to the feature implemented in this patch, so will get fixed in the next patches, but here are some results: 1. collect perf.data file on a Fedora 12 machine, x86_64, 64-bit userland 2. transfer it to a Debian Testing machine, PARISC64, 32-bit userland acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | head -5 74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms] 55fdd56670453ea66c011158c4b9d30179c1d049 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/ipt_MASQUERADE.ko 41adff63c730890480980d5d8ba513f1c216a858 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/iptable_nat.ko 90a33def1077bb8e97b8a78546dc96c2de62df46 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/nf_nat.ko 984c7bea90ce1376d5c8e7ef43a781801286e62d /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/tun.ko acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | tail -5 22492f3753c6a67de5c7ccbd6b863390c92c0723 /usr/lib64/libXt.so.6.0.0 353802bb7e1b895ba43507cc678f951e778e4c6f /usr/lib64/libMagickCore.so.2.0.0 d10c2897558595efe7be8b0584cf7e6398bc776c /usr/lib64/libfprint.so.0.0.0 a83ecfb519a788774a84d5ddde633c9ba56c03ab /home/acme/bin/perf d3ca765a8ecf257d263801d7ad8c49c189082317 /usr/lib64/libdwarf.so.0.0 acme@parisc:~/git/linux-2.6-tip$ acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm The file [kernel.kallsyms] cannot be used, trying to use /proc/kallsyms... ^^^^ The problem related to vmlinux handling, it shouldn't be trying this ^^^^ rather alien /proc/kallsyms at all... /lib64/libpthread-2.10.2.so with build id 5c68f7afeb33309c78037e374b0deee84dd441f6 not found, continuing without symbols /lib64/libc-2.10.2.so with build id eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 not found, continuing without symbols /home/acme/bin/perf with build id a83ecfb519a788774a84d5ddde633c9ba56c03ab not found, continuing without symbols /usr/sbin/openvpn with build id f2037a091ef36b591187a858d75e203690ea9409 not found, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwlcore.ko, continuing without symbols <SNIP more complaints about not finding the right build-ids, those will have to wait for 'perf archive' or plain copying what was collected by 'perf record' on the x86_64, source machine, see further below for an example of this > # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 acme@parisc:~/git/linux-2.6-tip$ Which matches what we get when running the same command for the same perf.data file on the F12, x86_64, source machine: [root@doppio linux-2.6-tip]# perf report --sort comm # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 [root@doppio linux-2.6-tip]# The other modes work as well, modulo the problem with vmlinux: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object # ........ ............... ................................. # 35.11% find ffffffff81002b5a 18.25% perf ffffffff8102235f 16.17% find libc-2.10.2.so 9.07% find find 5.80% swapper ffffffff8102235f 3.95% perf libc-2.10.2.so 2.33% init ffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k 1.35% find [e1000e] 0.68% sleep libc-2.10.2.so acme@parisc:~/git/linux-2.6-tip$ And the lack of the right buildids: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso,symbol 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object Symbol # ........ ............... ................................. ...... # 35.11% find ffffffff81002b5a [k] 0xffffffff81002b5a 18.25% perf ffffffff8102235f [k] 0xffffffff8102235f 16.17% find libc-2.10.2.so [.] 0x00000000045782 9.07% find find [.] 0x0000000000fb0e 5.80% swapper ffffffff8102235f [k] 0xffffffff8102235f 3.95% perf libc-2.10.2.so [.] 0x0000000007f398 2.33% init ffffffff810091b9 [k] 0xffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k [.] 0x00000000105440 1.35% find [e1000e] [k] 0x00000000010948 0.68% sleep libc-2.10.2.so [.] 0x0000000011ad5b acme@parisc:~/git/linux-2.6-tip$ But if we: acme@parisc:~/git/linux-2.6-tip$ ls ~/.debug ls: cannot access /home/acme/.debug: No such file or directory acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/lib64/libc-2.10.2.so/ acme@parisc:~/git/linux-2.6-tip$ scp doppio:.debug/lib64/libc-2.10.2.so/* ~/.debug/lib64/libc-2.10.2.so/ acme@doppio's password: eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 100% 1783KB 714.7KB/s 00:02 acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/.build-id/eb acme@parisc:~/git/linux-2.6-tip$ ln -s ../../lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 ~/.debug/.build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 acme@parisc:~/git/linux-2.6-tip$ perf report --dsos libc-2.10.2.so 2> /dev/null # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # acme@parisc:~/git/linux-2.6-tip$ Which matches what we get on the source, F12, x86_64 machine: [root@doppio linux-2.6-tip]# perf report --dsos libc-2.10.2.so # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # [root@doppio linux-2.6-tip]# So I think this is really, really nice in that it demonstrates the portability of perf.data files and the use of build-ids accross such aliens worlds :-) There are some things to fix tho, like the bitmap on the header, but things are looking good. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1263478990-8200-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-14 07:23:10 -07:00
void perf_event_header__bswap(struct perf_event_header *self);
int __perf_session__process_events(struct perf_session *self,
u64 data_offset, u64 data_size, u64 size,
struct perf_event_ops *ops);
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
perf tools: Handle relocatable kernels DSOs don't have this problem because the kernel emits a PERF_MMAP for each new executable mapping it performs on monitored threads. To fix the kernel case we simulate the same behaviour, by having 'perf record' to synthesize a PERF_MMAP for the kernel, encoded like this: [root@doppio ~]# perf record -a -f sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.344 MB perf.data (~15038 samples) ] [root@doppio ~]# perf report -D | head -10 0xd0 [0x40]: event: 1 . . ... raw event: size 64 bytes . 0000: 01 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 ......@........ . 0010: 00 00 00 81 ff ff ff ff 00 00 00 00 00 00 00 00 ............... . 0020: 00 00 00 00 00 00 00 00 5b 6b 65 72 6e 65 6c 2e ........ [kernel . 0030: 6b 61 6c 6c 73 79 6d 73 2e 5f 74 65 78 74 5d 00 kallsyms._text] . 0xd0 [0x40]: PERF_RECORD_MMAP 0/0: [0xffffffff81000000((nil)) @ (nil)]: [kernel.kallsyms._text] I.e. we identify such event as having: .pid = 0 .filename = [kernel.kallsyms.REFNAME] .start = REFNAME addr in /proc/kallsyms at 'perf record' time and use now a hardcoded value of '.text' for REFNAME. Then, later, in 'perf report', if there are any kernel hits and thus we need to resolve kernel symbols, we search for REFNAME and if its address changed, relocation happened and we thus must change the kernel mapping routines to one that uses .pgoff as the relocation to apply. This way we use the same mechanism used for the other DSOs and don't have to do a two pass in all the kernel symbols. Reported-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> LKML-Reference: <1262717431-1246-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-05 11:50:31 -07:00
const char *symbol_name,
u64 addr);
perf tools: Cross platform perf.data analysis support There are still some problems related to loading vmlinux files, but those are unrelated to the feature implemented in this patch, so will get fixed in the next patches, but here are some results: 1. collect perf.data file on a Fedora 12 machine, x86_64, 64-bit userland 2. transfer it to a Debian Testing machine, PARISC64, 32-bit userland acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | head -5 74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms] 55fdd56670453ea66c011158c4b9d30179c1d049 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/ipt_MASQUERADE.ko 41adff63c730890480980d5d8ba513f1c216a858 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/iptable_nat.ko 90a33def1077bb8e97b8a78546dc96c2de62df46 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/nf_nat.ko 984c7bea90ce1376d5c8e7ef43a781801286e62d /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/tun.ko acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | tail -5 22492f3753c6a67de5c7ccbd6b863390c92c0723 /usr/lib64/libXt.so.6.0.0 353802bb7e1b895ba43507cc678f951e778e4c6f /usr/lib64/libMagickCore.so.2.0.0 d10c2897558595efe7be8b0584cf7e6398bc776c /usr/lib64/libfprint.so.0.0.0 a83ecfb519a788774a84d5ddde633c9ba56c03ab /home/acme/bin/perf d3ca765a8ecf257d263801d7ad8c49c189082317 /usr/lib64/libdwarf.so.0.0 acme@parisc:~/git/linux-2.6-tip$ acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm The file [kernel.kallsyms] cannot be used, trying to use /proc/kallsyms... ^^^^ The problem related to vmlinux handling, it shouldn't be trying this ^^^^ rather alien /proc/kallsyms at all... /lib64/libpthread-2.10.2.so with build id 5c68f7afeb33309c78037e374b0deee84dd441f6 not found, continuing without symbols /lib64/libc-2.10.2.so with build id eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 not found, continuing without symbols /home/acme/bin/perf with build id a83ecfb519a788774a84d5ddde633c9ba56c03ab not found, continuing without symbols /usr/sbin/openvpn with build id f2037a091ef36b591187a858d75e203690ea9409 not found, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwlcore.ko, continuing without symbols <SNIP more complaints about not finding the right build-ids, those will have to wait for 'perf archive' or plain copying what was collected by 'perf record' on the x86_64, source machine, see further below for an example of this > # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 acme@parisc:~/git/linux-2.6-tip$ Which matches what we get when running the same command for the same perf.data file on the F12, x86_64, source machine: [root@doppio linux-2.6-tip]# perf report --sort comm # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 [root@doppio linux-2.6-tip]# The other modes work as well, modulo the problem with vmlinux: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object # ........ ............... ................................. # 35.11% find ffffffff81002b5a 18.25% perf ffffffff8102235f 16.17% find libc-2.10.2.so 9.07% find find 5.80% swapper ffffffff8102235f 3.95% perf libc-2.10.2.so 2.33% init ffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k 1.35% find [e1000e] 0.68% sleep libc-2.10.2.so acme@parisc:~/git/linux-2.6-tip$ And the lack of the right buildids: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso,symbol 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object Symbol # ........ ............... ................................. ...... # 35.11% find ffffffff81002b5a [k] 0xffffffff81002b5a 18.25% perf ffffffff8102235f [k] 0xffffffff8102235f 16.17% find libc-2.10.2.so [.] 0x00000000045782 9.07% find find [.] 0x0000000000fb0e 5.80% swapper ffffffff8102235f [k] 0xffffffff8102235f 3.95% perf libc-2.10.2.so [.] 0x0000000007f398 2.33% init ffffffff810091b9 [k] 0xffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k [.] 0x00000000105440 1.35% find [e1000e] [k] 0x00000000010948 0.68% sleep libc-2.10.2.so [.] 0x0000000011ad5b acme@parisc:~/git/linux-2.6-tip$ But if we: acme@parisc:~/git/linux-2.6-tip$ ls ~/.debug ls: cannot access /home/acme/.debug: No such file or directory acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/lib64/libc-2.10.2.so/ acme@parisc:~/git/linux-2.6-tip$ scp doppio:.debug/lib64/libc-2.10.2.so/* ~/.debug/lib64/libc-2.10.2.so/ acme@doppio's password: eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 100% 1783KB 714.7KB/s 00:02 acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/.build-id/eb acme@parisc:~/git/linux-2.6-tip$ ln -s ../../lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 ~/.debug/.build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 acme@parisc:~/git/linux-2.6-tip$ perf report --dsos libc-2.10.2.so 2> /dev/null # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # acme@parisc:~/git/linux-2.6-tip$ Which matches what we get on the source, F12, x86_64 machine: [root@doppio linux-2.6-tip]# perf report --dsos libc-2.10.2.so # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # [root@doppio linux-2.6-tip]# So I think this is really, really nice in that it demonstrates the portability of perf.data files and the use of build-ids accross such aliens worlds :-) There are some things to fix tho, like the bitmap on the header, but things are looking good. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1263478990-8200-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-14 07:23:10 -07:00
void mem_bswap_64(void *src, int byte_size);
int perf_session__create_kernel_maps(struct perf_session *self);
perf report: Implement initial UI using newt Newt has widespread availability and provides a rather simple API as can be seen by the size of this patch. The work needed to support it will benefit other frontends too. In this initial patch it just checks if the output is a tty, if not it falls back to the previous behaviour, also if newt-devel/libnewt-dev is not installed the previous behaviour is maintaned. Pressing enter on a symbol will annotate it, ESC in the annotation window will return to the report symbol list. More work will be done to remove the special casing in color_fprintf, stop using fmemopen/FILE in the printing of hist_entries, etc. Also the annotation doesn't need to be done via spawning "perf annotate" and then browsing its output, we can do better by calling directly the builtin-annotate.c functions, that would then be moved to tools/perf/util/annotate.c and shared with perf top, etc But lets go by baby steps, this patch already improves perf usability by allowing to quickly do annotations on symbols from the report screen and provides a first experimentation with libnewt/TUI integration of tools. Tested on RHEL5 and Fedora12 X86_64 and on Debian PARISC64 to browse a perf.data file collected on a Fedora12 x86_64 box. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1268349164-5822-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-11 16:12:44 -07:00
int do_read(int fd, void *buf, size_t size);
void perf_session__update_sample_type(struct perf_session *self);
perf report: Implement initial UI using newt Newt has widespread availability and provides a rather simple API as can be seen by the size of this patch. The work needed to support it will benefit other frontends too. In this initial patch it just checks if the output is a tty, if not it falls back to the previous behaviour, also if newt-devel/libnewt-dev is not installed the previous behaviour is maintaned. Pressing enter on a symbol will annotate it, ESC in the annotation window will return to the report symbol list. More work will be done to remove the special casing in color_fprintf, stop using fmemopen/FILE in the printing of hist_entries, etc. Also the annotation doesn't need to be done via spawning "perf annotate" and then browsing its output, we can do better by calling directly the builtin-annotate.c functions, that would then be moved to tools/perf/util/annotate.c and shared with perf top, etc But lets go by baby steps, this patch already improves perf usability by allowing to quickly do annotations on symbols from the report screen and provides a first experimentation with libnewt/TUI integration of tools. Tested on RHEL5 and Fedora12 X86_64 and on Debian PARISC64 to browse a perf.data file collected on a Fedora12 x86_64 box. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1268349164-5822-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-11 16:12:44 -07:00
#ifdef NO_NEWT_SUPPORT
static inline int perf_session__browse_hists(struct rb_root *hists __used,
u64 nr_hists __used,
perf report: Implement initial UI using newt Newt has widespread availability and provides a rather simple API as can be seen by the size of this patch. The work needed to support it will benefit other frontends too. In this initial patch it just checks if the output is a tty, if not it falls back to the previous behaviour, also if newt-devel/libnewt-dev is not installed the previous behaviour is maintaned. Pressing enter on a symbol will annotate it, ESC in the annotation window will return to the report symbol list. More work will be done to remove the special casing in color_fprintf, stop using fmemopen/FILE in the printing of hist_entries, etc. Also the annotation doesn't need to be done via spawning "perf annotate" and then browsing its output, we can do better by calling directly the builtin-annotate.c functions, that would then be moved to tools/perf/util/annotate.c and shared with perf top, etc But lets go by baby steps, this patch already improves perf usability by allowing to quickly do annotations on symbols from the report screen and provides a first experimentation with libnewt/TUI integration of tools. Tested on RHEL5 and Fedora12 X86_64 and on Debian PARISC64 to browse a perf.data file collected on a Fedora12 x86_64 box. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1268349164-5822-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-11 16:12:44 -07:00
u64 session_total __used,
const char *helpline __used,
const char *input_name __used)
{
return 0;
}
perf report: Implement initial UI using newt Newt has widespread availability and provides a rather simple API as can be seen by the size of this patch. The work needed to support it will benefit other frontends too. In this initial patch it just checks if the output is a tty, if not it falls back to the previous behaviour, also if newt-devel/libnewt-dev is not installed the previous behaviour is maintaned. Pressing enter on a symbol will annotate it, ESC in the annotation window will return to the report symbol list. More work will be done to remove the special casing in color_fprintf, stop using fmemopen/FILE in the printing of hist_entries, etc. Also the annotation doesn't need to be done via spawning "perf annotate" and then browsing its output, we can do better by calling directly the builtin-annotate.c functions, that would then be moved to tools/perf/util/annotate.c and shared with perf top, etc But lets go by baby steps, this patch already improves perf usability by allowing to quickly do annotations on symbols from the report screen and provides a first experimentation with libnewt/TUI integration of tools. Tested on RHEL5 and Fedora12 X86_64 and on Debian PARISC64 to browse a perf.data file collected on a Fedora12 x86_64 box. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1268349164-5822-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-11 16:12:44 -07:00
#else
int perf_session__browse_hists(struct rb_root *hists, u64 nr_hists,
u64 session_total, const char *helpline,
const char *input_name);
perf report: Implement initial UI using newt Newt has widespread availability and provides a rather simple API as can be seen by the size of this patch. The work needed to support it will benefit other frontends too. In this initial patch it just checks if the output is a tty, if not it falls back to the previous behaviour, also if newt-devel/libnewt-dev is not installed the previous behaviour is maintaned. Pressing enter on a symbol will annotate it, ESC in the annotation window will return to the report symbol list. More work will be done to remove the special casing in color_fprintf, stop using fmemopen/FILE in the printing of hist_entries, etc. Also the annotation doesn't need to be done via spawning "perf annotate" and then browsing its output, we can do better by calling directly the builtin-annotate.c functions, that would then be moved to tools/perf/util/annotate.c and shared with perf top, etc But lets go by baby steps, this patch already improves perf usability by allowing to quickly do annotations on symbols from the report screen and provides a first experimentation with libnewt/TUI integration of tools. Tested on RHEL5 and Fedora12 X86_64 and on Debian PARISC64 to browse a perf.data file collected on a Fedora12 x86_64 box. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1268349164-5822-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-11 16:12:44 -07:00
#endif
static inline
struct machine *perf_session__find_host_machine(struct perf_session *self)
{
return machines__find_host(&self->machines);
}
static inline
struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
{
return machines__find(&self->machines, pid);
}
static inline
struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
{
return machines__findnew(&self->machines, pid);
}
static inline
void perf_session__process_machines(struct perf_session *self,
machine__process_t process)
{
return machines__process(&self->machines, process, self);
}
static inline
size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
return machines__fprintf_dsos(&self->machines, fp);
}
static inline
size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
bool with_hits)
{
return machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
#endif /* __PERF_SESSION_H */