remarkable-linux/tools/perf/util/session.h
Frederic Weisbecker 1b3a0e9592 perf callchain: Feed callchains into a cursor
The callchains are fed with an array of a fixed size.
As a result we iterate over each callchains three times:

- 1st to resolve symbols
- 2nd to filter out context boundaries
- 3rd for the insertion into the tree

This also involves some pairs of memory allocation/deallocation
everytime we insert a callchain, for the filtered out array of
addresses and for the array of symbols that comes along.

Instead, feed the callchains through a linked list with persistent
allocations. It brings several pros like:

- Merge the 1st and 2nd iterations in one. That was possible before
but in a way that would involve allocating an array slightly taller
than necessary because we don't know in advance the number of context
boundaries to filter out.

- Much lesser allocations/deallocations. The linked list keeps
persistent empty entries for the next usages and is extendable at
will.

- Makes it easier for multiple sources of callchains to feed a
stacktrace together. This is deemed to pave the way for cfi based
callchains wherein traditional frame pointer based kernel
stacktraces will precede cfi based user ones, producing an overall
callchain which size is hardly predictable. This requirement
makes the static array obsolete and makes a linked list based
iterator a much more flexible fit.

Basic testing on a big perf file containing callchains (~ 176 MB)
has shown a throughput gain of about 11% with perf report.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1294977121-5700-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-01-22 19:56:31 -02:00

159 lines
4.2 KiB
C

#ifndef __PERF_SESSION_H
#define __PERF_SESSION_H
#include "hist.h"
#include "event.h"
#include "header.h"
#include "symbol.h"
#include "thread.h"
#include <linux/rbtree.h>
#include "../../../include/linux/perf_event.h"
struct sample_queue;
struct ip_callchain;
struct thread;
struct ordered_samples {
u64 last_flush;
u64 next_flush;
u64 max_timestamp;
struct list_head samples;
struct list_head sample_cache;
struct list_head to_free;
struct sample_queue *sample_buffer;
struct sample_queue *last_sample;
int sample_buffer_idx;
};
struct perf_session {
struct perf_header header;
unsigned long size;
unsigned long mmap_window;
struct rb_root threads;
struct list_head dead_threads;
struct thread *last_match;
struct machine host_machine;
struct rb_root machines;
struct rb_root hists_tree;
/*
* FIXME: should point to the first entry in hists_tree and
* be a hists instance. Right now its only 'report'
* that is using ->hists_tree while all the rest use
* ->hists.
*/
struct hists hists;
u64 sample_type;
int fd;
bool fd_pipe;
bool repipe;
bool sample_id_all;
u16 id_hdr_size;
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
struct callchain_cursor callchain_cursor;
char filename[0];
};
struct perf_event_ops;
typedef int (*event_op)(event_t *self, struct sample_data *sample,
struct perf_session *session);
typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
typedef int (*event_op2)(event_t *self, struct perf_session *session,
struct perf_event_ops *ops);
struct perf_event_ops {
event_op sample,
mmap,
comm,
fork,
exit,
lost,
read,
throttle,
unthrottle;
event_synth_op attr,
event_type,
tracing_data,
build_id;
event_op2 finished_round;
bool ordered_samples;
bool ordering_requires_timestamps;
};
struct perf_session *perf_session__new(const char *filename, int mode,
bool force, bool repipe,
struct perf_event_ops *ops);
void perf_session__delete(struct perf_session *self);
void perf_event_header__bswap(struct perf_event_header *self);
int __perf_session__process_events(struct perf_session *self,
u64 data_offset, u64 data_size, u64 size,
struct perf_event_ops *ops);
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
int perf_session__resolve_callchain(struct perf_session *self,
struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
const char *symbol_name,
u64 addr);
void mem_bswap_64(void *src, int byte_size);
int perf_session__create_kernel_maps(struct perf_session *self);
void perf_session__update_sample_type(struct perf_session *self);
void perf_session__set_sample_id_all(struct perf_session *session, bool value);
void perf_session__set_sample_type(struct perf_session *session, u64 type);
void perf_session__remove_thread(struct perf_session *self, struct thread *th);
static inline
struct machine *perf_session__find_host_machine(struct perf_session *self)
{
return &self->host_machine;
}
static inline
struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
{
if (pid == HOST_KERNEL_ID)
return &self->host_machine;
return machines__find(&self->machines, pid);
}
static inline
struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
{
if (pid == HOST_KERNEL_ID)
return &self->host_machine;
return machines__findnew(&self->machines, pid);
}
static inline
void perf_session__process_machines(struct perf_session *self,
machine__process_t process)
{
process(&self->host_machine, self);
return machines__process(&self->machines, process, self);
}
size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
FILE *fp, bool with_hits);
static inline
size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp)
{
return hists__fprintf_nr_events(&self->hists, fp);
}
#endif /* __PERF_SESSION_H */