diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 154a05cd03af..4d3f02fa223d 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -15,13 +15,18 @@ #include "event.h" struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; + union { + u64 value; + struct { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; + }; + }; }; struct branch_info { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index cb33cd42ff43..8f668ee29f25 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -154,6 +154,11 @@ struct callchain_cursor_node { struct callchain_cursor_node *next; }; +struct stitch_list { + struct list_head node; + struct callchain_cursor_node cursor; +}; + struct callchain_cursor { u64 nr; struct callchain_cursor_node *first; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a659b4a1b3f2..4be7634dccf5 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1717,15 +1717,14 @@ static u64 intel_pt_lbr_flags(u64 info) union { struct branch_flags flags; u64 result; - } u = { - .flags = { - .mispred = !!(info & LBR_INFO_MISPRED), - .predicted = !(info & LBR_INFO_MISPRED), - .in_tx = !!(info & LBR_INFO_IN_TX), - .abort = !!(info & LBR_INFO_ABORT), - .cycles = info & LBR_INFO_CYCLES, - } - }; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; return u.result; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 737dee723a57..5ac32cabe4e6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2348,6 +2348,119 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, return 0; } +static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct callchain_cursor_node *cnode; + struct stitch_list *stitch_node; + int err; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) { + cnode = &stitch_node->cursor; + + err = callchain_cursor_append(cursor, cnode->ip, + &cnode->ms, + cnode->branch, + &cnode->branch_flags, + cnode->nr_loop_iter, + cnode->iter_cycles, + cnode->branch_from, + cnode->srcline); + if (err) + return err; + } + return 0; +} + +static struct stitch_list *get_stitch_node(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *stitch_node; + + if (!list_empty(&lbr_stitch->free_lists)) { + stitch_node = list_first_entry(&lbr_stitch->free_lists, + struct stitch_list, node); + list_del(&stitch_node->node); + + return stitch_node; + } + + return malloc(sizeof(struct stitch_list)); +} + +static bool has_stitched_lbr(struct thread *thread, + struct perf_sample *cur, + struct perf_sample *prev, + unsigned int max_lbr, + bool callee) +{ + struct branch_stack *cur_stack = cur->branch_stack; + struct branch_entry *cur_entries = perf_sample__branch_entries(cur); + struct branch_stack *prev_stack = prev->branch_stack; + struct branch_entry *prev_entries = perf_sample__branch_entries(prev); + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + int i, j, nr_identical_branches = 0; + struct stitch_list *stitch_node; + u64 cur_base, distance; + + if (!cur_stack || !prev_stack) + return false; + + /* Find the physical index of the base-of-stack for current sample. */ + cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1; + + distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) : + (max_lbr + prev_stack->hw_idx - cur_base); + /* Previous sample has shorter stack. Nothing can be stitched. */ + if (distance + 1 > prev_stack->nr) + return false; + + /* + * Check if there are identical LBRs between two samples. + * Identicall LBRs must have same from, to and flags values. Also, + * they have to be saved in the same LBR registers (same physical + * index). + * + * Starts from the base-of-stack of current sample. + */ + for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) { + if ((prev_entries[i].from != cur_entries[j].from) || + (prev_entries[i].to != cur_entries[j].to) || + (prev_entries[i].flags.value != cur_entries[j].flags.value)) + break; + nr_identical_branches++; + } + + if (!nr_identical_branches) + return false; + + /* + * Save the LBRs between the base-of-stack of previous sample + * and the base-of-stack of current sample into lbr_stitch->lists. + * These LBRs will be stitched later. + */ + for (i = prev_stack->nr - 1; i > (int)distance; i--) { + + if (!lbr_stitch->prev_lbr_cursor[i].valid) + continue; + + stitch_node = get_stitch_node(thread); + if (!stitch_node) + return false; + + memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], + sizeof(struct callchain_cursor_node)); + + if (callee) + list_add(&stitch_node->node, &lbr_stitch->lists); + else + list_add_tail(&stitch_node->node, &lbr_stitch->lists); + } + + return true; +} + static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) { if (thread->lbr_stitch) @@ -2361,6 +2474,9 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) if (!thread->lbr_stitch->prev_lbr_cursor) goto free_lbr_stitch; + INIT_LIST_HEAD(&thread->lbr_stitch->lists); + INIT_LIST_HEAD(&thread->lbr_stitch->free_lists); + return true; free_lbr_stitch: @@ -2386,9 +2502,11 @@ static int resolve_lbr_callchain_sample(struct thread *thread, int max_stack, unsigned int max_lbr) { + bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr), i; struct lbr_stitch *lbr_stitch; + bool stitched_lbr = false; u64 branch_from = 0; int err; @@ -2405,10 +2523,18 @@ static int resolve_lbr_callchain_sample(struct thread *thread, (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) { lbr_stitch = thread->lbr_stitch; + stitched_lbr = has_stitched_lbr(thread, sample, + &lbr_stitch->prev_sample, + max_lbr, callee); + + if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { + list_replace_init(&lbr_stitch->lists, + &lbr_stitch->free_lists); + } memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); } - if (callchain_param.order == ORDER_CALLEE) { + if (callee) { /* Add kernel ip */ err = lbr_callchain_add_kernel_ip(thread, cursor, sample, parent, root_al, branch_from, @@ -2421,7 +2547,18 @@ static int resolve_lbr_callchain_sample(struct thread *thread, if (err) goto error; + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); + if (err) + goto error; + } + } else { + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); + if (err) + goto error; + } err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, root_al, &branch_from, false); if (err) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8d0da260c84c..665e5c0618ed 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -454,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine, return dso__data_read_offset(al.map->dso, machine, offset, buf, len); } + +void thread__free_stitch_list(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *pos, *tmp; + + if (!lbr_stitch) + return; + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + list_del_init(&pos->node); + free(pos); + } + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) { + list_del_init(&pos->node); + free(pos); + } + + zfree(&lbr_stitch->prev_lbr_cursor); + zfree(&thread->lbr_stitch); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 8456174a52c5..b066fb30d203 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -24,6 +23,8 @@ struct thread_stack; struct unwind_libunwind_ops; struct lbr_stitch { + struct list_head lists; + struct list_head free_lists; struct perf_sample prev_sample; struct callchain_cursor_node *prev_lbr_cursor; }; @@ -154,15 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread) return false; } -static inline void thread__free_stitch_list(struct thread *thread) -{ - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; - - if (!lbr_stitch) - return; - - zfree(&lbr_stitch->prev_lbr_cursor); - zfree(&thread->lbr_stitch); -} +void thread__free_stitch_list(struct thread *thread); #endif /* __PERF_THREAD_H */