1
0
Fork 0

perf/core improvements and fixes:

. Add libdw DWARF post unwind support for ARM (Jean Pihet)
 
 . Consolidate types.h for ARM and ARM64 (Jean Pihet)
 
 . Fix possible null pointer dereference in session.c (Masanari Iida)
 
 . Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang)
 
 . Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)
 
 . Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)
 
 Signed-off-by: Jiri Olsa <jolsa@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJTefKDAAoJEPZqUSBWB3s9KhsP/A+vkQ2ClJsCTK9ADSRA1NSx
 wnsXjE4UzgjQeaqSz+6tm/2BbckHhkzt2hcrQ690Gm0cqvY+WJ8FNxxiEmFXDgFG
 TCISeYjqpao7V1XgQX+deyIJSYD3TP/EzTUbpoJWZXIImKGs4mCP/D5C+5SNnXm2
 NREvZeHErqcrRiha6Qa9AwdoVG/uQAQKaZeEQKIMIhH7aiqgRTbGrNbobcZ5R9bT
 dZCE1Zq3ns5/Vco7HAsu0Ij6SvmETNb2OPdGrBb7zr2tNRXFKVyZAGfG+rN8G915
 Dt5fioE3s0kQMYpq8PHDcbkBH1HNbfz+QujJp7OKThC0zl6lH8Tyx8Oj2L8C7UyI
 J1vL87HLjxRvTb1EgTw7vR+n2te4vNgTd1TdXjAQyxhTU0V2AnhI+8ZQQUoKPvOX
 exQlt9ftL8IEC4EyKCyx6B5dRNIK7RVLFPKevf7a39QVEtqTbutMqu9u0I/+1PxZ
 s/v3sgLKKzVLjo3Bfsne0ZMJ+a9eJ8vz2vi63UCw44QaFXCl1VmMvoZaJxOMGnvN
 Mjlg5Vlgl0tSGToJkq0nF7mlb9iKrAYsUUWO9ND7Ya4B8C79u482oJDXI0YqH76g
 rCQlPwLbAcJF17+9z9kol2RvJolFgQnnZ52ypSYErJoKSMTdVhZ0HaaAsO4R6uKz
 RoHcyunF7ZNAmeOnzOg5
 =l453
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core

Pull perf/core improvements and fixes from Jiri Olsa:

  * Add libdw DWARF post unwind support for ARM (Jean Pihet)

  * Consolidate types.h for ARM and ARM64 (Jean Pihet)

  * Fix possible null pointer dereference in session.c (Masanari Iida)

  * Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang)

  * Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)

  * Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
hifive-unleashed-5.1
Ingo Molnar 2014-05-20 08:36:09 +02:00
commit 6480c56130
13 changed files with 187 additions and 33 deletions

View File

@ -411,7 +411,7 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
ifndef NO_DWARF_UNWIND
ifeq ($(ARCH),x86)
ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
endif
endif

View File

@ -5,3 +5,10 @@ endif
ifndef NO_LIBUNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
endif
ifndef NO_LIBDW_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
endif
ifndef NO_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif

View File

@ -2,10 +2,15 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP

View File

@ -0,0 +1,60 @@
#include <string.h>
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "event.h"
#include "tests/tests.h"
#define STACK_SIZE 8192
static int sample_ustack(struct perf_sample *sample,
struct thread *thread, u64 *regs)
{
struct stack_dump *stack = &sample->user_stack;
struct map *map;
unsigned long sp;
u64 stack_size, *buf;
buf = malloc(STACK_SIZE);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
sp = (unsigned long) regs[PERF_REG_ARM_SP];
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
return -1;
}
stack_size = map->end - sp;
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
memcpy(buf, (void *) sp, stack_size);
stack->data = (char *) buf;
stack->size = stack_size;
return 0;
}
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
u64 *buf;
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
regs->mask = PERF_REGS_MASK;
return sample_ustack(sample, thread, buf);
}

View File

@ -0,0 +1,58 @@
#include <linux/linkage.h>
#define R0 0x00
#define R1 0x08
#define R2 0x10
#define R3 0x18
#define R4 0x20
#define R5 0x28
#define R6 0x30
#define R7 0x38
#define R8 0x40
#define R9 0x48
#define SL 0x50
#define FP 0x58
#define IP 0x60
#define SP 0x68
#define LR 0x70
#define PC 0x78
/*
* Implementation of void perf_regs_load(u64 *regs);
*
* This functions fills in the 'regs' buffer from the actual registers values,
* in the way the perf built-in unwinding test expects them:
* - the PC at the time at the call to this function. Since this function
* is called using a bl instruction, the PC value is taken from LR.
* The built-in unwinding test then unwinds the call stack from the dwarf
* information in unwind__get_entries.
*
* Notes:
* - the 8 bytes stride in the registers offsets comes from the fact
* that the registers are stored in an u64 array (u64 *regs),
* - the regs buffer needs to be zeroed before the call to this function,
* in this case using a calloc in dwarf-unwind.c.
*/
.text
.type perf_regs_load,%function
ENTRY(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
str r3, [r0, #R3]
str r4, [r0, #R4]
str r5, [r0, #R5]
str r6, [r0, #R6]
str r7, [r0, #R7]
str r8, [r0, #R8]
str r9, [r0, #R9]
str sl, [r0, #SL]
str fp, [r0, #FP]
str ip, [r0, #IP]
str sp, [r0, #SP]
str lr, [r0, #LR]
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
ENDPROC(perf_regs_load)

View File

@ -0,0 +1,36 @@
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(FP);
dwarf_regs[12] = REG(IP);
dwarf_regs[13] = REG(SP);
dwarf_regs[14] = REG(LR);
dwarf_regs[15] = REG(PC);
return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
dwarf_regs);
}

View File

@ -2,7 +2,7 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)

View File

@ -149,7 +149,6 @@ struct perf_sched {
unsigned long nr_runs;
unsigned long nr_timestamps;
unsigned long nr_unordered_timestamps;
unsigned long nr_state_machine_bugs;
unsigned long nr_context_switch_bugs;
unsigned long nr_events;
unsigned long nr_lost_chunks;
@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
const u32 pid = perf_evsel__intval(evsel, sample, "pid"),
success = perf_evsel__intval(evsel, sample, "success");
const u32 pid = perf_evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
u64 timestamp = sample->time;
/* Note for later, it may be interesting to observe the failing cases */
if (!success)
return 0;
wakee = machine__findnew_thread(machine, 0, pid);
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
atom = list_entry(atoms->work_list.prev, struct work_atom, list);
/*
* As we do not guarantee the wakeup event happens when
* task is out of run queue, also may happen when task is
* on run queue and wakeup only change ->state to TASK_RUNNING,
* then we should not set the ->wake_up_time when wake up a
* task which is on run queue.
*
* You WILL be missing events if you've recorded only
* one CPU, or are only looking at only one, so don't
* make useless noise.
* skip in this case.
*/
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
sched->nr_state_machine_bugs++;
return 0;
sched->nr_timestamps++;
if (atom->sched_out_time > timestamp) {
@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_out __maybe_unused, *sched_in;
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
return -1;
}
sched_out = machine__findnew_thread(machine, 0, prev_pid);
sched_in = machine__findnew_thread(machine, 0, next_pid);
sched->curr_thread[this_cpu] = sched_in;
@ -1501,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
}
if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
sched->nr_state_machine_bugs, sched->nr_timestamps);
if (sched->nr_lost_events)
printf(" (due to lost events?)");
printf("\n");
}
if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,

View File

@ -40,11 +40,11 @@ ifeq ($(ARCH),arm64)
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
# So far there's only x86 libdw unwind support merged in perf.
# So far there's only x86 and arm libdw unwind support merged in perf.
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(ARCH),x86)
ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif

View File

@ -115,7 +115,7 @@ static struct test {
.desc = "Test parsing with no sample_id_all bit set",
.func = test__parse_no_sample_id_all,
},
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "Test dwarf unwind",

View File

@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
if (perf_evsel__test_field(evsel, "prio", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "success", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
ret = -1;

View File

@ -45,7 +45,7 @@ int test__hists_filter(void);
int test__mmap_thread_lookup(void);
int test__thread_mg_share(void);
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;

View File

@ -1625,13 +1625,14 @@ out_delete_map:
void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
bool full)
{
int fd = perf_data_file__fd(session->file);
struct stat st;
int ret;
int fd, ret;
if (session == NULL || fp == NULL)
return;
fd = perf_data_file__fd(session->file);
ret = fstat(fd, &st);
if (ret == -1)
return;