From 54ebbe7ba51d97a28a9a406203d171d61858e4b9 Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Fri, 21 Jan 2011 10:06:54 +0000 Subject: [PATCH 01/30] oprofile: Introduce new oprofile sample add function (oprofile_add_ext_hw_sample) This patch introduces a new oprofile sample add function (oprofile_add_ext_hw_sample) that can also take task_struct as an argument, which is used by the hwsampler kernel module when copying hardware samples to OProfile buffers. Applied with following changes: * removed #include * whitespace changes * removed conditional compilation (CONFIG_HAVE_HWSAMPLER) * modified order of functions * fix missing function definition in header file Signed-off-by: Mahesh Salgaonkar Signed-off-by: Maran Pakkirisamy Signed-off-by: Heinz Graalfs Acked-by: Heiko Carstens Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 24 +++++++++++++++++------- include/linux/oprofile.h | 7 +++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 59f55441e075..b8ef8ddcc292 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -258,8 +258,10 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf, */ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, - unsigned long backtrace, int is_kernel, unsigned long event) + unsigned long backtrace, int is_kernel, unsigned long event, + struct task_struct *task) { + struct task_struct *tsk = task ? task : current; cpu_buf->sample_received++; if (pc == ESCAPE_CODE) { @@ -267,7 +269,7 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, return 0; } - if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + if (op_add_code(cpu_buf, backtrace, is_kernel, tsk)) goto fail; if (op_add_sample(cpu_buf, pc, event)) @@ -292,7 +294,8 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) static inline void __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel) + unsigned long event, int is_kernel, + struct task_struct *task) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); unsigned long backtrace = oprofile_backtrace_depth; @@ -301,7 +304,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, * if log_sample() fail we can't backtrace since we lost the * source of this event */ - if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task)) /* failed */ return; @@ -313,10 +316,17 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, oprofile_end_trace(cpu_buf); } +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel, task); +} + void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel) { - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) @@ -332,7 +342,7 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) pc = ESCAPE_CODE; /* as this causes an early return. */ } - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } /* @@ -403,7 +413,7 @@ int oprofile_write_commit(struct op_entry *entry) void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); - log_sample(cpu_buf, pc, 0, is_kernel, event); + log_sample(cpu_buf, pc, 0, is_kernel, event, NULL); } void oprofile_add_trace(unsigned long pc) diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ca64113efe8..7f5cfd3b37dd 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -106,6 +106,13 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel); +/** + * Add an hardware sample. + */ +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task); + /* Use this instead when the PC value is not from the regs. Doesn't * backtrace. */ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event); From ec6a3df1c008d9e8664e53b0363f6847c5c0dc3f Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Fri, 21 Jan 2011 10:06:52 +0000 Subject: [PATCH 02/30] oprofile, s390: Add support for hardware based sampling on System z processors This adds support for hardware based sampling on System z processors (models z10 and up). System z's hardware sampling is described in detail in: SA23-2260-01 "The Load-Program-Parameter and CPU-Measurement Facilities" The patch introduces - support for System z's hardware sampler in OProfile's kernel module - it adds functions that control all hardware sampling related operations as: - checking if hardware sampling feature is available, i.e.: on System z models z10 and up, in LPAR mode only, and authorised during LPAR activation - allocating memory for the hardware sampling feature - starting/stopping hardware sampling All functions required to start and stop hardware sampling have to be invoked by the oprofile kernel module as provided by the other patches of this patch set. In case hardware based sampling cannot be setup standard timer based sampling is used by OProfile. Applied with following changes: * enable compilation in Makefile Signed-off-by: Mahesh Salgaonkar Signed-off-by: Maran Pakkirisamy Signed-off-by: Heinz Graalfs Acked-by: Heiko Carstens Signed-off-by: Robert Richter --- arch/Kconfig | 3 + arch/s390/Kconfig | 1 + arch/s390/oprofile/Makefile | 2 +- arch/s390/oprofile/hwsampler.c | 1256 ++++++++++++++++++++++++++++++++ arch/s390/oprofile/hwsampler.h | 113 +++ 5 files changed, 1374 insertions(+), 1 deletion(-) create mode 100644 arch/s390/oprofile/hwsampler.c create mode 100644 arch/s390/oprofile/hwsampler.h diff --git a/arch/Kconfig b/arch/Kconfig index f78c2be4242b..43abf3c6da8e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,9 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config HAVE_HWSAMPLER + bool + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ff19efdf6fef..0cf20adfbb45 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,6 +115,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select HAVE_HWSAMPLER config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 537b2d840e69..d698cddcfbdd 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 000000000000..ab3f770e35ba --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1256 @@ +/** + * arch/s390/oprofile/hwsampler.c + * + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hwsampler.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +#define ALERT_REQ_MASK 0x4000000000000000ul +#define BUFFER_FULL_MASK 0x8000000000000000ul + +#define EI_IEA (1 << 31) /* invalid entry address */ +#define EI_ISE (1 << 30) /* incorrect SDBT entry */ +#define EI_PRA (1 << 29) /* program request alert */ +#define EI_SACA (1 << 23) /* sampler authorization change alert */ +#define EI_LSDA (1 << 22) /* loss of sample data alert */ + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static int ssctl(void *buffer) +{ + int cc; + + /* set in order to detect a program check */ + cc = 1; + + asm volatile( + "0: .insn s,0xB2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (buffer) + : "m" (*((struct hws_ssctl_request_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0 ; +} + +static int qsi(void *buffer) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xB2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (buffer) + : "m" (*((struct hws_qsi_info_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = ssctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* prototypes for external interrupt handler and worker */ +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64); + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +static unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} + +static int prepare_cpu_buffers(void) +{ + int cpu; + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } + + return rc; +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + unsigned long long facility_bits[2]; + + memcpy(facility_bits, S390_lowcore.stfle_fac_list, 32); + if (!(facility_bits[1] & (1ULL << 59))) + return -EOPNOTSUPP; + + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int __cpuinit hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) +{ + int cpu; + struct hws_cpu_buffer *cb; + + cpu = smp_processor_id(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + atomic_xchg( + &cb->ext_params, + atomic_read(&cb->ext_params) + | S390_lowcore.ext_params); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & EI_IEA) + cb->req_alert++; + + if (ext_params & EI_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & EI_IEA) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & EI_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & EI_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_data_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_data_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +long hwsampler_query_min_interval(void) +{ + if (min_sampler_rate) + return min_sampler_rate; + else + return -EINVAL; +} + +long hwsampler_query_max_interval(void) +{ + if (max_sampler_rate) + return max_sampler_rate; + else + return -EINVAL; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup() +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_interrupt(0x1407, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_interrupt(0x1407, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + rc = prepare_cpu_buffers(); + if (rc) + goto start_all_exit; + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all() +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return rc; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 000000000000..8c72b59316b5 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,113 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include + +struct hws_qsi_info_block /* QUERY SAMPLING information block */ +{ /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of sampling entry */ + unsigned int:16; /* 6-7: reserved */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +}; + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long lpp; /* Logical-Partition Program Param. */ + unsigned long long vpp; /* Virtual-Machine Program Param. */ +}; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned long:62; /* 2 - 63: Reserved */ + unsigned long overflow; /* 64 - sample Overflow count */ + unsigned long timestamp; /* 16 - time-stamp */ + unsigned long timestamp1; /* */ + unsigned long reserved1; /* 32 -Reserved */ + unsigned long reserved2; /* */ + unsigned long progusage1; /* 48 - reserved for programming use */ + unsigned long progusage2; /* */ +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +long hwsampler_query_min_interval(void); +long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ From 997dbb4967da248808850c250182ef2528fff2d1 Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Fri, 21 Jan 2011 10:06:53 +0000 Subject: [PATCH 03/30] oprofile, s390: Enhance OProfile to support System zs hardware sampling feature OProfile is enhanced to export all files for controlling System z's hardware sampling, and to invoke hwsampler exported functions to initialize and use System z's hardware sampling. The patch invokes hwsampler_setup() during oprofile init and exports following hwsampler files under oprofilefs if hwsampler's setup succeeded: A new directory for hardware sampling based files /dev/oprofile/hwsampling/ The userland daemon must explicitly write to the following files to disable (or enable) hardware based sampling /dev/oprofile/hwsampling/hwsampler to modify the actual sampling rate /dev/oprofile/hwsampling/hw_interval to modify the amount of sampling memory (measured in 4K pages) /dev/oprofile/hwsampling/hw_sdbt_blocks The following files are read only and show the possible minimum sampling rate /dev/oprofile/hwsampling/hw_min_interval the possible maximum sampling rate /dev/oprofile/hwsampling/hw_max_interval The patch splits the oprofile_timer_[init/exit] function so that it can be also called through user context (oprofilefs) to avoid kernel oops. Applied with following changes: * whitespace changes in Makefile and timer_int.c Signed-off-by: Mahesh Salgaonkar Signed-off-by: Maran Pakkirisamy Signed-off-by: Heinz Graalfs Acked-by: Heiko Carstens Signed-off-by: Robert Richter --- arch/s390/oprofile/Makefile | 2 +- arch/s390/oprofile/hwsampler_files.c | 146 +++++++++++++++++++++++++++ arch/s390/oprofile/init.c | 7 +- drivers/oprofile/oprof.c | 32 ++++++ drivers/oprofile/oprof.h | 2 + drivers/oprofile/timer_int.c | 15 ++- include/linux/oprofile.h | 21 ++++ 7 files changed, 220 insertions(+), 5 deletions(-) create mode 100644 arch/s390/oprofile/hwsampler_files.c diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index d698cddcfbdd..00d8fc8e4429 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o hwsampler_files.o diff --git a/arch/s390/oprofile/hwsampler_files.c b/arch/s390/oprofile/hwsampler_files.c new file mode 100644 index 000000000000..493f7cc4e861 --- /dev/null +++ b/arch/s390/oprofile/hwsampler_files.c @@ -0,0 +1,146 @@ +/** + * arch/s390/oprofile/hwsampler_files.c + * + * Copyright IBM Corp. 2010 + * Author: Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + */ +#include +#include +#include + +#include "hwsampler.h" + +#define DEFAULT_INTERVAL 4096 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static unsigned long oprofile_hwsampler; + +static int oprofile_hwsampler_start(void) +{ + int retval; + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + hwsampler_stop_all(); + hwsampler_deallocate(); + return; +} + +int oprofile_arch_set_hwsampler(struct oprofile_operations *ops) +{ + printk(KERN_INFO "oprofile: using hardware sampling\n"); + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + ops->cpu_type = "timer"; + + return 0; +} + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (oprofile_hwsampler == val) + return -EINVAL; + + retval = oprofile_set_hwsampler(val); + + if (retval) + return retval; + + oprofile_hwsampler = val; + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +static int oprofile_create_hwsampling_files(struct super_block *sb, + struct dentry *root) +{ + struct dentry *hw_dir; + + /* reinitialize default values */ + oprofile_hwsampler = 1; + + hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!hw_dir) + return -EINVAL; + + oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); + oprofilefs_create_ulong(sb, hw_dir, "hw_interval", + &oprofile_hw_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + return 0; +} + +int oprofile_hwsampler_init(struct oprofile_operations* ops) +{ + if (hwsampler_setup()) + return -ENODEV; + + /* + * create hwsampler files only if hwsampler_setup() succeeds. + */ + ops->create_files = oprofile_create_hwsampling_files; + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval < 0) { + oprofile_min_interval = 0; + return -ENODEV; + } + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval < 0) { + oprofile_max_interval = 0; + return -ENODEV; + } + oprofile_arch_set_hwsampler(ops); + return 0; +} + +void oprofile_hwsampler_exit(void) +{ + hwsampler_shutdown(); +} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 7a995113b918..f6b3f724f590 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -11,16 +11,21 @@ #include #include #include +#include +extern int oprofile_hwsampler_init(struct oprofile_operations* ops); +extern void oprofile_hwsampler_exit(void); extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations* ops) { ops->backtrace = s390_backtrace; - return -ENODEV; + + return oprofile_hwsampler_init(ops); } void oprofile_arch_exit(void) { + oprofile_hwsampler_exit(); } diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f9bda64fcd1b..43b01daa91e1 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,6 +239,38 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } +#ifdef CONFIG_HAVE_HWSAMPLER +int oprofile_set_hwsampler(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + switch (val) { + case 1: + /* Switch to hardware sampling. */ + __oprofile_timer_exit(); + err = oprofile_arch_set_hwsampler(&oprofile_ops); + break; + case 0: + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + err = __oprofile_timer_init(&oprofile_ops); + break; + default: + err = -EINVAL; + } + +out: + mutex_unlock(&start_mutex); + return err; +} +#endif /* CONFIG_HAVE_HWSAMPLER */ + static int __init oprofile_init(void) { int err; diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 177b73de5e5f..5a6ceb1954a2 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -35,7 +35,9 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); +int __oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); +void __oprofile_timer_exit(void); int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 010725117dbb..0099a458fd37 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,14 +97,13 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int __init oprofile_timer_init(struct oprofile_operations *ops) +int __oprofile_timer_init(struct oprofile_operations *ops) { int rc; rc = register_hotcpu_notifier(&oprofile_cpu_notifier); if (rc) return rc; - ops->create_files = NULL; ops->setup = NULL; ops->shutdown = NULL; ops->start = oprofile_hrtimer_start; @@ -113,7 +112,17 @@ int __init oprofile_timer_init(struct oprofile_operations *ops) return 0; } -void __exit oprofile_timer_exit(void) +int __init oprofile_timer_init(struct oprofile_operations *ops) +{ + return __oprofile_timer_init(ops); +} + +void __oprofile_timer_exit(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } + +void __exit oprofile_timer_exit(void) +{ + __oprofile_timer_exit(); +} diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 7f5cfd3b37dd..b517d869e1ad 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -91,6 +91,27 @@ int oprofile_arch_init(struct oprofile_operations * ops); */ void oprofile_arch_exit(void); +#ifdef CONFIG_HAVE_HWSAMPLER +/** + * setup hardware sampler for oprofiling. + */ + +int oprofile_set_hwsampler(unsigned long); + +/** + * hardware sampler module initialization for the s390 arch + */ + +int oprofile_arch_set_hwsampler(struct oprofile_operations *ops); + +/** + * Add an s390 hardware sample. + */ +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task); +#endif /* CONFIG_HAVE_HWSAMPLER */ + /** * Add a sample. This may be called from any context. */ From a0d76247e07abd14968adc4486aaa8e270e9c209 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Feb 2011 17:31:44 +0100 Subject: [PATCH 04/30] oprofile, s390: Rework hwsampler implementation This patch is a rework of the hwsampler oprofile implementation that has been applied recently. Now there are less non-architectural changes. The only changes are: * introduction of oprofile_add_ext_hw_sample(), and * removal of section attributes of oprofile_timer_init/_exit(). To setup hwsampler for oprofile we need to modify start()/stop() callbacks and additional hwsampler control files in oprofilefs. We do not reinitialize the timer or hwsampler mode by restarting calling init/exit() anymore, instead hwsampler_running is used to switch the mode directly in oprofile_hwsampler_start/_stop(). For locking reasons there is also hwsampler_file that reflects the value in oprofilefs. The overall diffstat of the oprofile s390 hwsampler implemenation shows the low impact to non-architectural code: arch/Kconfig | 3 + arch/s390/Kconfig | 1 + arch/s390/oprofile/Makefile | 2 +- arch/s390/oprofile/hwsampler.c | 1256 ++++++++++++++++++++++++++++++++++ arch/s390/oprofile/hwsampler.h | 113 +++ arch/s390/oprofile/hwsampler_files.c | 162 +++++ arch/s390/oprofile/init.c | 6 +- drivers/oprofile/cpu_buffer.c | 24 +- drivers/oprofile/timer_int.c | 4 +- include/linux/oprofile.h | 7 + 10 files changed, 1567 insertions(+), 11 deletions(-) Acked-by: Heiko Carstens Signed-off-by: Robert Richter --- arch/s390/oprofile/hwsampler_files.c | 60 ++++++++++++++++++---------- arch/s390/oprofile/init.c | 1 - drivers/oprofile/oprof.c | 32 --------------- drivers/oprofile/oprof.h | 2 - drivers/oprofile/timer_int.c | 15 ++----- include/linux/oprofile.h | 21 ---------- 6 files changed, 41 insertions(+), 90 deletions(-) diff --git a/arch/s390/oprofile/hwsampler_files.c b/arch/s390/oprofile/hwsampler_files.c index 493f7cc4e861..2e1da2449ba9 100644 --- a/arch/s390/oprofile/hwsampler_files.c +++ b/arch/s390/oprofile/hwsampler_files.c @@ -8,6 +8,7 @@ #include #include +#include "../../../drivers/oprofile/oprof.h" #include "hwsampler.h" #define DEFAULT_INTERVAL 4096 @@ -22,12 +23,20 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static unsigned long oprofile_hwsampler; +static int hwsampler_file; +static int hwsampler_running; /* start_mutex must be held to change */ + +static struct oprofile_operations timer_ops; static int oprofile_hwsampler_start(void) { int retval; + hwsampler_running = hwsampler_file; + + if (!hwsampler_running) + return timer_ops.start(); + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); if (retval) return retval; @@ -41,25 +50,20 @@ static int oprofile_hwsampler_start(void) static void oprofile_hwsampler_stop(void) { + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + hwsampler_stop_all(); hwsampler_deallocate(); return; } -int oprofile_arch_set_hwsampler(struct oprofile_operations *ops) -{ - printk(KERN_INFO "oprofile: using hardware sampling\n"); - ops->start = oprofile_hwsampler_start; - ops->stop = oprofile_hwsampler_stop; - ops->cpu_type = "timer"; - - return 0; -} - static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -75,15 +79,16 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, if (retval) return retval; - if (oprofile_hwsampler == val) - return -EINVAL; + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; - retval = oprofile_set_hwsampler(val); + hwsampler_file = val; - if (retval) - return retval; - - oprofile_hwsampler = val; return count; } @@ -98,7 +103,7 @@ static int oprofile_create_hwsampling_files(struct super_block *sb, struct dentry *hw_dir; /* reinitialize default values */ - oprofile_hwsampler = 1; + hwsampler_file = 1; hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); if (!hw_dir) @@ -125,7 +130,6 @@ int oprofile_hwsampler_init(struct oprofile_operations* ops) /* * create hwsampler files only if hwsampler_setup() succeeds. */ - ops->create_files = oprofile_create_hwsampling_files; oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval < 0) { oprofile_min_interval = 0; @@ -136,11 +140,23 @@ int oprofile_hwsampler_init(struct oprofile_operations* ops) oprofile_max_interval = 0; return -ENODEV; } - oprofile_arch_set_hwsampler(ops); + + if (oprofile_timer_init(ops)) + return -ENODEV; + + printk(KERN_INFO "oprofile: using hardware sampling\n"); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + ops->create_files = oprofile_create_hwsampling_files; + return 0; } void oprofile_hwsampler_exit(void) { + oprofile_timer_exit(); hwsampler_shutdown(); } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index f6b3f724f590..059b44b9f171 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -11,7 +11,6 @@ #include #include #include -#include extern int oprofile_hwsampler_init(struct oprofile_operations* ops); extern void oprofile_hwsampler_exit(void); diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 43b01daa91e1..f9bda64fcd1b 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,38 +239,6 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } -#ifdef CONFIG_HAVE_HWSAMPLER -int oprofile_set_hwsampler(unsigned long val) -{ - int err = 0; - - mutex_lock(&start_mutex); - - if (oprofile_started) { - err = -EBUSY; - goto out; - } - - switch (val) { - case 1: - /* Switch to hardware sampling. */ - __oprofile_timer_exit(); - err = oprofile_arch_set_hwsampler(&oprofile_ops); - break; - case 0: - printk(KERN_INFO "oprofile: using timer interrupt.\n"); - err = __oprofile_timer_init(&oprofile_ops); - break; - default: - err = -EINVAL; - } - -out: - mutex_unlock(&start_mutex); - return err; -} -#endif /* CONFIG_HAVE_HWSAMPLER */ - static int __init oprofile_init(void) { int err; diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 5a6ceb1954a2..177b73de5e5f 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -35,9 +35,7 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); -int __oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); -void __oprofile_timer_exit(void); int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 0099a458fd37..3ef44624f510 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,13 +97,14 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int __oprofile_timer_init(struct oprofile_operations *ops) +int oprofile_timer_init(struct oprofile_operations *ops) { int rc; rc = register_hotcpu_notifier(&oprofile_cpu_notifier); if (rc) return rc; + ops->create_files = NULL; ops->setup = NULL; ops->shutdown = NULL; ops->start = oprofile_hrtimer_start; @@ -112,17 +113,7 @@ int __oprofile_timer_init(struct oprofile_operations *ops) return 0; } -int __init oprofile_timer_init(struct oprofile_operations *ops) -{ - return __oprofile_timer_init(ops); -} - -void __oprofile_timer_exit(void) +void oprofile_timer_exit(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } - -void __exit oprofile_timer_exit(void) -{ - __oprofile_timer_exit(); -} diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index b517d869e1ad..7f5cfd3b37dd 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -91,27 +91,6 @@ int oprofile_arch_init(struct oprofile_operations * ops); */ void oprofile_arch_exit(void); -#ifdef CONFIG_HAVE_HWSAMPLER -/** - * setup hardware sampler for oprofiling. - */ - -int oprofile_set_hwsampler(unsigned long); - -/** - * hardware sampler module initialization for the s390 arch - */ - -int oprofile_arch_set_hwsampler(struct oprofile_operations *ops); - -/** - * Add an s390 hardware sample. - */ -void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel, - struct task_struct *task); -#endif /* CONFIG_HAVE_HWSAMPLER */ - /** * Add a sample. This may be called from any context. */ From 7bb2e269aefe4539313922ab4a89367cd52a51d1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 14 Feb 2011 19:08:33 +0100 Subject: [PATCH 05/30] oprofile, s390: Fix section mismatch of function hws_cpu_callback() Fixes the following section mismatch: Section mismatch in reference from the variable hws_cpu_notifier to the function .cpuinit.text:hws_cpu_callback() Acked-by: Heiko Carstens Signed-off-by: Robert Richter --- arch/s390/oprofile/hwsampler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index ab3f770e35ba..3d48f4db246d 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -578,7 +578,7 @@ static struct notifier_block hws_oom_notifier = { .notifier_call = hws_oom_callback }; -static int __cpuinit hws_cpu_callback(struct notifier_block *nfb, +static int hws_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* We do not have sampler space available for all possible CPUs. From ec52d9765a0f3603c62b4238482bf38897e4d42f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Mar 2011 10:11:48 -0300 Subject: [PATCH 06/30] perf top: Remove redundant perf_top->sym_counter We can get that counter index from perf_top->sym_evsel->idx instead. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 11 ++++++----- tools/perf/util/top.c | 2 +- tools/perf/util/top.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 80c9e062bd5b..4bf6e02c2b2a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -515,24 +515,25 @@ static void handle_keypress(struct perf_session *session, int c) break; case 'E': if (top.evlist->nr_entries > 1) { + int counter; fprintf(stderr, "\nAvailable events:"); list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel)); - prompt_integer(&top.sym_counter, "Enter details event counter"); + prompt_integer(&counter, "Enter details event counter"); - if (top.sym_counter >= top.evlist->nr_entries) { + if (counter >= top.evlist->nr_entries) { top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); - top.sym_counter = 0; fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel)); sleep(1); break; } list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) - if (top.sym_evsel->idx == top.sym_counter) + if (top.sym_evsel->idx == counter) break; - } else top.sym_counter = 0; + } else + top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); break; case 'f': prompt_integer(&top.count_filter, "Enter display event count filter"); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 75cfe4d45119..fcfb77762819 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -171,7 +171,7 @@ float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) { struct sym_entry *syme, *n; float sum_ksamples = 0.0; - int snap = !top->display_weighted ? top->sym_counter : 0, j; + int snap = !top->display_weighted ? top->sym_evsel->idx : 0, j; /* Sort the active symbols */ pthread_mutex_lock(&top->active_symbols_lock); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 96d1cb78af01..96a78312c719 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -41,7 +41,7 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; - int display_weighted, freq, rb_entries, sym_counter; + int display_weighted, freq, rb_entries; pid_t target_pid, target_tid; bool hide_kernel_symbols, hide_user_symbols, zero; const char *cpu_list; From 8b8ba4a9a5b04916858f79cee71873f973931649 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Mar 2011 12:38:48 -0300 Subject: [PATCH 07/30] perf top: Remove redundant syme->origin field We can get it from syme->map->dso->kernel (that should be renamed to origin, but leave this for another patch). Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 1 - tools/perf/util/top.c | 4 ++-- tools/perf/util/top.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4bf6e02c2b2a..4976400a1438 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -782,7 +782,6 @@ static void perf_event__process_sample(const union perf_event *event, if (!syme->skip) { struct perf_evsel *evsel; - syme->origin = origin; evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); syme->count[evsel->idx]++; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index fcfb77762819..a11f60735a18 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -184,9 +184,9 @@ float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) if (syme->snap_count != 0) { if ((top->hide_user_symbols && - syme->origin == PERF_RECORD_MISC_USER) || + syme->map->dso->kernel == DSO_TYPE_USER) || (top->hide_kernel_symbols && - syme->origin == PERF_RECORD_MISC_KERNEL)) { + syme->map->dso->kernel == DSO_TYPE_KERNEL)) { perf_top__remove_active_sym(top, syme); continue; } diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 96a78312c719..ba111b25e16a 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -17,7 +17,6 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; - u8 origin; struct map *map; unsigned long count[0]; }; From 878b439dccd064d6908800fab0b47bd3c3a87ebb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Mar 2011 13:13:36 -0300 Subject: [PATCH 08/30] perf symbols: Rename dso->origin to dso->symtab_type And the DSO__ORIG_ enum to SYMTAB__, to clarify that this is about from where the symtab was obtained. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 +-- tools/perf/util/annotate.c | 2 +- tools/perf/util/symbol.c | 64 +++++++++++++++++++------------------- tools/perf/util/symbol.h | 26 ++++++++-------- 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4976400a1438..31ea7a68baa4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -152,7 +152,7 @@ static int parse_source(struct sym_entry *syme) /* * We can't annotate with just /proc/kallsyms */ - if (map->dso->origin == DSO__ORIG_KERNEL) { + if (map->dso->symtab_type == SYMTAB__KALLSYMS) { pr_err("Can't annotate %s: No vmlinux file was found in the " "path\n", sym->name); sleep(1); @@ -769,7 +769,7 @@ static void perf_event__process_sample(const union perf_event *event, struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); pr_err("Can't annotate %s", sym->name); - if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { + if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) { pr_err(": No vmlinux file was found in the path:\n"); machine__fprintf_vmlinux_path(machine, stderr); } else diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0d0830c98cd7..e01af2b1a469 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -294,7 +294,7 @@ fallback: free_filename = false; } - if (dso->origin == DSO__ORIG_KERNEL) { + if (dso->symtab_type == SYMTAB__KALLSYMS) { char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; char *build_id_msg = NULL; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 00014e32c288..651dbfe7f4f3 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -207,7 +207,7 @@ struct dso *dso__new(const char *name) dso__set_short_name(self, self->name); for (i = 0; i < MAP__NR_TYPES; ++i) self->symbols[i] = self->symbol_names[i] = RB_ROOT; - self->origin = DSO__ORIG_NOT_FOUND; + self->symtab_type = SYMTAB__NOT_FOUND; self->loaded = 0; self->sorted_by_name = 0; self->has_build_id = 0; @@ -680,9 +680,9 @@ int dso__load_kallsyms(struct dso *self, const char *filename, return -1; if (self->kernel == DSO_TYPE_GUEST_KERNEL) - self->origin = DSO__ORIG_GUEST_KERNEL; + self->symtab_type = SYMTAB__GUEST_KALLSYMS; else - self->origin = DSO__ORIG_KERNEL; + self->symtab_type = SYMTAB__KALLSYMS; return dso__split_kallsyms(self, map, filter); } @@ -1204,7 +1204,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, } curr_map->map_ip = identity__map_ip; curr_map->unmap_ip = identity__map_ip; - curr_dso->origin = self->origin; + curr_dso->symtab_type = self->symtab_type; map_groups__insert(kmap->kmaps, curr_map); dsos__add(&self->node, curr_dso); dso__set_loaded(curr_dso, map->type); @@ -1430,21 +1430,21 @@ out: char dso__symtab_origin(const struct dso *self) { static const char origin[] = { - [DSO__ORIG_KERNEL] = 'k', - [DSO__ORIG_JAVA_JIT] = 'j', - [DSO__ORIG_BUILD_ID_CACHE] = 'B', - [DSO__ORIG_FEDORA] = 'f', - [DSO__ORIG_UBUNTU] = 'u', - [DSO__ORIG_BUILDID] = 'b', - [DSO__ORIG_DSO] = 'd', - [DSO__ORIG_KMODULE] = 'K', - [DSO__ORIG_GUEST_KERNEL] = 'g', - [DSO__ORIG_GUEST_KMODULE] = 'G', + [SYMTAB__KALLSYMS] = 'k', + [SYMTAB__JAVA_JIT] = 'j', + [SYMTAB__BUILD_ID_CACHE] = 'B', + [SYMTAB__FEDORA_DEBUGINFO] = 'f', + [SYMTAB__UBUNTU_DEBUGINFO] = 'u', + [SYMTAB__BUILDID_DEBUGINFO] = 'b', + [SYMTAB__SYSTEM_PATH_DSO] = 'd', + [SYMTAB__SYSTEM_PATH_KMODULE] = 'K', + [SYMTAB__GUEST_KALLSYMS] = 'g', + [SYMTAB__GUEST_KMODULE] = 'G', }; - if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) + if (self == NULL || self->symtab_type == SYMTAB__NOT_FOUND) return '!'; - return origin[self->origin]; + return origin[self->symtab_type]; } int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) @@ -1477,8 +1477,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) if (strncmp(self->name, "/tmp/perf-", 10) == 0) { ret = dso__load_perf_map(self, map, filter); - self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : - DSO__ORIG_NOT_FOUND; + self->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT : + SYMTAB__NOT_FOUND; return ret; } @@ -1486,26 +1486,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) * On the first pass, only load images if they have a full symtab. * Failing that, do a second pass where we accept .dynsym also */ - for (self->origin = DSO__ORIG_BUILD_ID_CACHE, want_symtab = 1; - self->origin != DSO__ORIG_NOT_FOUND; - self->origin++) { - switch (self->origin) { - case DSO__ORIG_BUILD_ID_CACHE: + for (self->symtab_type = SYMTAB__BUILD_ID_CACHE, want_symtab = 1; + self->symtab_type != SYMTAB__NOT_FOUND; + self->symtab_type++) { + switch (self->symtab_type) { + case SYMTAB__BUILD_ID_CACHE: /* skip the locally configured cache if a symfs is given */ if (symbol_conf.symfs[0] || (dso__build_id_filename(self, name, size) == NULL)) { continue; } break; - case DSO__ORIG_FEDORA: + case SYMTAB__FEDORA_DEBUGINFO: snprintf(name, size, "%s/usr/lib/debug%s.debug", symbol_conf.symfs, self->long_name); break; - case DSO__ORIG_UBUNTU: + case SYMTAB__UBUNTU_DEBUGINFO: snprintf(name, size, "%s/usr/lib/debug%s", symbol_conf.symfs, self->long_name); break; - case DSO__ORIG_BUILDID: { + case SYMTAB__BUILDID_DEBUGINFO: { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; if (!self->has_build_id) @@ -1519,11 +1519,11 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) symbol_conf.symfs, build_id_hex, build_id_hex + 2); } break; - case DSO__ORIG_DSO: + case SYMTAB__SYSTEM_PATH_DSO: snprintf(name, size, "%s%s", symbol_conf.symfs, self->long_name); break; - case DSO__ORIG_GUEST_KMODULE: + case SYMTAB__GUEST_KMODULE: if (map->groups && machine) root_dir = machine->root_dir; else @@ -1532,7 +1532,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) root_dir, self->long_name); break; - case DSO__ORIG_KMODULE: + case SYMTAB__SYSTEM_PATH_KMODULE: snprintf(name, size, "%s%s", symbol_conf.symfs, self->long_name); break; @@ -1544,7 +1544,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) */ if (want_symtab) { want_symtab = 0; - self->origin = DSO__ORIG_BUILD_ID_CACHE; + self->symtab_type = SYMTAB__BUILD_ID_CACHE; } else continue; } @@ -1757,9 +1757,9 @@ struct map *machine__new_module(struct machine *self, u64 start, return NULL; if (machine__is_host(self)) - dso->origin = DSO__ORIG_KMODULE; + dso->symtab_type = SYMTAB__SYSTEM_PATH_KMODULE; else - dso->origin = DSO__ORIG_GUEST_KMODULE; + dso->symtab_type = SYMTAB__GUEST_KMODULE; map_groups__insert(&self->kmaps, map); return map; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 4d7ed09fe332..db39c0c63608 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -137,7 +137,7 @@ struct dso { u8 annotate_warned:1; u8 sname_alloc:1; u8 lname_alloc:1; - unsigned char origin; + unsigned char symtab_type; u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; @@ -188,18 +188,18 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp); size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); -enum dso_origin { - DSO__ORIG_KERNEL = 0, - DSO__ORIG_GUEST_KERNEL, - DSO__ORIG_JAVA_JIT, - DSO__ORIG_BUILD_ID_CACHE, - DSO__ORIG_FEDORA, - DSO__ORIG_UBUNTU, - DSO__ORIG_BUILDID, - DSO__ORIG_DSO, - DSO__ORIG_GUEST_KMODULE, - DSO__ORIG_KMODULE, - DSO__ORIG_NOT_FOUND, +enum symtab_type { + SYMTAB__KALLSYMS = 0, + SYMTAB__GUEST_KALLSYMS, + SYMTAB__JAVA_JIT, + SYMTAB__BUILD_ID_CACHE, + SYMTAB__FEDORA_DEBUGINFO, + SYMTAB__UBUNTU_DEBUGINFO, + SYMTAB__BUILDID_DEBUGINFO, + SYMTAB__SYSTEM_PATH_DSO, + SYMTAB__GUEST_KMODULE, + SYMTAB__SYSTEM_PATH_KMODULE, + SYMTAB__NOT_FOUND, }; char dso__symtab_origin(const struct dso *self); From 171b3be9c42e97cd4530706654242f6a3efb6ac3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Mar 2011 13:36:01 -0300 Subject: [PATCH 09/30] perf symbol: Move sym_entry->skip to symbol->ignore While going thru each of the sym_entry fields looking to reduce it to the set of entries needed when in an active symbols list, 'skip' should really be in symbol, as we set it when loading the symtab. And the space used by the basic symbol allocation remains the same as we had 5 bytes of padding. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 ++-- tools/perf/util/symbol.h | 5 +++++ tools/perf/util/top.h | 1 - 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 31ea7a68baa4..70f1075cc5b0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -676,7 +676,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { - syme->skip = 1; + sym->ignore = true; break; } } @@ -779,7 +779,7 @@ static void perf_event__process_sample(const union perf_event *event, } syme = symbol__priv(al.sym); - if (!syme->skip) { + if (!al.sym->ignore) { struct perf_evsel *evsel; evsel = perf_evlist__id2evsel(top.evlist, sample->id); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index db39c0c63608..713b0b40cc4a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -48,12 +48,17 @@ char *strxfrchar(char *s, char from, char to); #define BUILD_ID_SIZE 20 +/** struct symbol - symtab entry + * + * @ignore - resolvable but tools ignore it (e.g. idle routines) + */ struct symbol { struct rb_node rb_node; u64 start; u64 end; u16 namelen; u8 binding; + bool ignore; char name[0]; }; diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index ba111b25e16a..bfbf95bcc603 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -16,7 +16,6 @@ struct sym_entry { struct list_head node; unsigned long snap_count; double weight; - int skip; struct map *map; unsigned long count[0]; }; From cfd748ae066e776d45bdce550b47cd00c67d55de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 14 Mar 2011 16:40:30 +0100 Subject: [PATCH 10/30] perf stat: Provide support for filters Now the --filter option is usable with perf stat too. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <1300117230-8404-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 21c025222496..e2109f9b43eb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -333,6 +333,12 @@ static int run_perf_stat(int argc __used, const char **argv) } } + if (perf_evlist__set_filters(evsel_list)) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + return -1; + } + /* * Enable counters and exec the command: */ @@ -634,6 +640,8 @@ static const struct option options[] = { OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", parse_events), + OPT_CALLBACK(0, "filter", &evsel_list, "filter", + "event filter", parse_filter), OPT_BOOLEAN('i', "no-inherit", &no_inherit, "child tasks do not inherit counters"), OPT_INTEGER('p', "pid", &target_pid, From be6d842a65babc54e2b204b382df2529e304be48 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:23 -0700 Subject: [PATCH 11/30] perf script: Change process_event prototype Prepare for handling of samples for any event type. Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-2-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 25 ++++++++++++------- .../util/scripting-engines/trace-event-perl.c | 11 +++++--- .../scripting-engines/trace-event-python.c | 11 +++++--- tools/perf/util/trace-event-scripting.c | 9 +++---- tools/perf/util/trace-event.h | 7 ++++-- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5f40df635dcb..b2bdd5534026 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -20,6 +20,20 @@ static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; +static void process_event(union perf_event *event __unused, + struct perf_sample *sample, + struct perf_session *session __unused, + struct thread *thread) +{ + /* + * FIXME: better resolve from pid from the struct trace_entry + * field, although it should be the same than this perf + * event pid + */ + print_event(sample->cpu, sample->raw_data, sample->raw_size, + sample->time, thread->comm); +} + static int default_start_script(const char *script __unused, int argc __unused, const char **argv __unused) @@ -40,7 +54,7 @@ static int default_generate_script(const char *outfile __unused) static struct scripting_ops default_scripting_ops = { .start_script = default_start_script, .stop_script = default_stop_script, - .process_event = print_event, + .process_event = process_event, .generate_script = default_generate_script, }; @@ -86,14 +100,7 @@ static int process_sample_event(union perf_event *event, last_timestamp = sample->time; return 0; } - /* - * FIXME: better resolve from pid from the struct trace_entry - * field, although it should be the same than this perf - * event pid - */ - scripting_ops->process_event(sample->cpu, sample->raw_data, - sample->raw_size, - sample->time, thread->comm); + scripting_ops->process_event(event, sample, session, thread); } session->hists.stats.total_period += sample->period; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 93680818e244..621427212e86 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -245,9 +245,10 @@ static inline struct event *find_cache_event(int type) return event; } -static void perl_process_event(int cpu, void *data, - int size __unused, - unsigned long long nsecs, char *comm) +static void perl_process_event(union perf_event *pevent __unused, + struct perf_sample *sample, + struct perf_session *session __unused, + struct thread *thread) { struct format_field *field; static char handler[256]; @@ -256,6 +257,10 @@ static void perl_process_event(int cpu, void *data, struct event *event; int type; int pid; + int cpu = sample->cpu; + void *data = sample->raw_data; + unsigned long long nsecs = sample->time; + char *comm = thread->comm; dSP; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2040b8538527..1b85d6055159 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -204,9 +204,10 @@ static inline struct event *find_cache_event(int type) return event; } -static void python_process_event(int cpu, void *data, - int size __unused, - unsigned long long nsecs, char *comm) +static void python_process_event(union perf_event *pevent __unused, + struct perf_sample *sample, + struct perf_session *session __unused, + struct thread *thread) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; static char handler_name[256]; @@ -217,6 +218,10 @@ static void python_process_event(int cpu, void *data, unsigned n = 0; int type; int pid; + int cpu = sample->cpu; + void *data = sample->raw_data; + unsigned long long nsecs = sample->time; + char *comm = thread->comm; t = PyTuple_New(MAX_FIELDS); if (!t) diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index f7af2fca965d..66f4b78737ab 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -36,11 +36,10 @@ static int stop_script_unsupported(void) return 0; } -static void process_event_unsupported(int cpu __unused, - void *data __unused, - int size __unused, - unsigned long long nsecs __unused, - char *comm __unused) +static void process_event_unsupported(union perf_event *event __unused, + struct perf_sample *sample __unused, + struct perf_session *session __unused, + struct thread *thread __unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index b5f12ca24d99..5f7b5139c321 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -3,6 +3,7 @@ #include #include "parse-events.h" +#include "session.h" #define __unused __attribute__((unused)) @@ -278,8 +279,10 @@ struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); - void (*process_event) (int cpu, void *data, int size, - unsigned long long nsecs, char *comm); + void (*process_event) (union perf_event *event, + struct perf_sample *sample, + struct perf_session *session, + struct thread *thread); int (*generate_script) (const char *outfile); }; From 2ee7a49f935b19f7daf0a110800488acd2479cba Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:24 -0700 Subject: [PATCH 12/30] perf tracing: Remove print_graph_cpu and print_graph_proc from trace-event-parse Next patch moves printing of 'common' data into perf-script which removes the need for these functions. Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-3-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-parse.c | 65 +---------------------------- 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d8e622dd738a..dd5f058292d8 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2648,63 +2648,8 @@ static void print_lat_fmt(void *data, int size __unused) printf("%d", lock_depth); } -/* taken from Linux, written by Frederic Weisbecker */ -static void print_graph_cpu(int cpu) -{ - int i; - int log10_this = log10_cpu(cpu); - int log10_all = log10_cpu(cpus); - - - /* - * Start with a space character - to make it stand out - * to the right a bit when trace output is pasted into - * email: - */ - printf(" "); - - /* - * Tricky - we space the CPU field according to the max - * number of online CPUs. On a 2-cpu system it would take - * a maximum of 1 digit - on a 128 cpu system it would - * take up to 3 digits: - */ - for (i = 0; i < log10_all - log10_this; i++) - printf(" "); - - printf("%d) ", cpu); -} - -#define TRACE_GRAPH_PROCINFO_LENGTH 14 #define TRACE_GRAPH_INDENT 2 -static void print_graph_proc(int pid, const char *comm) -{ - /* sign + log10(MAX_INT) + '\0' */ - char pid_str[11]; - int spaces = 0; - int len; - int i; - - sprintf(pid_str, "%d", pid); - - /* 1 stands for the "-" character */ - len = strlen(comm) + strlen(pid_str) + 1; - - if (len < TRACE_GRAPH_PROCINFO_LENGTH) - spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; - - /* First spaces to align center */ - for (i = 0; i < spaces / 2; i++) - printf(" "); - - printf("%s-%s", comm, pid_str); - - /* Last spaces to align center */ - for (i = 0; i < spaces - (spaces / 2); i++) - printf(" "); -} - static struct record * get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func, struct record *next) @@ -2876,7 +2821,7 @@ static void print_graph_nested(struct event *event, void *data) static void pretty_print_func_ent(void *data, int size, struct event *event, - int cpu, int pid, const char *comm, + int cpu, int pid, const char *comm __unused, unsigned long secs, unsigned long usecs) { struct format_field *field; @@ -2886,9 +2831,6 @@ pretty_print_func_ent(void *data, int size, struct event *event, printf("%5lu.%06lu | ", secs, usecs); - print_graph_cpu(cpu); - print_graph_proc(pid, comm); - printf(" | "); if (latency_format) { @@ -2924,7 +2866,7 @@ out_free: static void pretty_print_func_ret(void *data, int size __unused, struct event *event, - int cpu, int pid, const char *comm, + int cpu __unused, int pid __unused, const char *comm __unused, unsigned long secs, unsigned long usecs) { unsigned long long rettime, calltime; @@ -2934,9 +2876,6 @@ pretty_print_func_ret(void *data, int size __unused, struct event *event, printf("%5lu.%06lu | ", secs, usecs); - print_graph_cpu(cpu); - print_graph_proc(pid, comm); - printf(" | "); if (latency_format) { From c70c94b47405d2c94df19c16273daf1f5fb9193d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:25 -0700 Subject: [PATCH 13/30] perf script: Move printing of 'common' data from print_event and rename This change does impact output: latency data is trace specific and is now printed after the common data - comm, tid, cpu, time and event name. Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-4-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 38 +++++++++++++++++----- tools/perf/util/trace-event-parse.c | 49 ++++++----------------------- tools/perf/util/trace-event.h | 3 +- 3 files changed, 42 insertions(+), 48 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b2bdd5534026..0a79da21df23 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -20,18 +20,42 @@ static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; +static void print_sample_start(struct perf_sample *sample, + struct thread *thread) +{ + int type; + struct event *event; + const char *evname = NULL; + unsigned long secs; + unsigned long usecs; + unsigned long long nsecs = sample->time; + + if (latency_format) + printf("%8.8s-%-5d %3d", thread->comm, sample->tid, sample->cpu); + else + printf("%16s-%-5d [%03d]", thread->comm, sample->tid, sample->cpu); + + secs = nsecs / NSECS_PER_SEC; + nsecs -= secs * NSECS_PER_SEC; + usecs = nsecs / NSECS_PER_USEC; + printf(" %5lu.%06lu: ", secs, usecs); + + type = trace_parse_common_type(sample->raw_data); + event = trace_find_event(type); + if (event) + evname = event->name; + + printf("%s: ", evname ? evname : "(unknown)"); +} + static void process_event(union perf_event *event __unused, struct perf_sample *sample, struct perf_session *session __unused, struct thread *thread) { - /* - * FIXME: better resolve from pid from the struct trace_entry - * field, although it should be the same than this perf - * event pid - */ - print_event(sample->cpu, sample->raw_data, sample->raw_size, - sample->time, thread->comm); + print_sample_start(sample, thread); + print_trace_event(sample->cpu, sample->raw_data, sample->raw_size); + printf("\n"); } static int default_start_script(const char *script __unused, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index dd5f058292d8..0a7ed5b5e281 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2643,9 +2643,9 @@ static void print_lat_fmt(void *data, int size __unused) printf("."); if (lock_depth < 0) - printf("."); + printf(". "); else - printf("%d", lock_depth); + printf("%d ", lock_depth); } #define TRACE_GRAPH_INDENT 2 @@ -2821,18 +2821,13 @@ static void print_graph_nested(struct event *event, void *data) static void pretty_print_func_ent(void *data, int size, struct event *event, - int cpu, int pid, const char *comm __unused, - unsigned long secs, unsigned long usecs) + int cpu, int pid) { struct format_field *field; struct record *rec; void *copy_data; unsigned long val; - printf("%5lu.%06lu | ", secs, usecs); - - printf(" | "); - if (latency_format) { print_lat_fmt(data, size); printf(" | "); @@ -2865,19 +2860,13 @@ out_free: } static void -pretty_print_func_ret(void *data, int size __unused, struct event *event, - int cpu __unused, int pid __unused, const char *comm __unused, - unsigned long secs, unsigned long usecs) +pretty_print_func_ret(void *data, int size __unused, struct event *event) { unsigned long long rettime, calltime; unsigned long long duration, depth; struct format_field *field; int i; - printf("%5lu.%06lu | ", secs, usecs); - - printf(" | "); - if (latency_format) { print_lat_fmt(data, size); printf(" | "); @@ -2915,31 +2904,21 @@ pretty_print_func_ret(void *data, int size __unused, struct event *event, static void pretty_print_func_graph(void *data, int size, struct event *event, - int cpu, int pid, const char *comm, - unsigned long secs, unsigned long usecs) + int cpu, int pid) { if (event->flags & EVENT_FL_ISFUNCENT) - pretty_print_func_ent(data, size, event, - cpu, pid, comm, secs, usecs); + pretty_print_func_ent(data, size, event, cpu, pid); else if (event->flags & EVENT_FL_ISFUNCRET) - pretty_print_func_ret(data, size, event, - cpu, pid, comm, secs, usecs); + pretty_print_func_ret(data, size, event); printf("\n"); } -void print_event(int cpu, void *data, int size, unsigned long long nsecs, - char *comm) +void print_trace_event(int cpu, void *data, int size) { struct event *event; - unsigned long secs; - unsigned long usecs; int type; int pid; - secs = nsecs / NSECS_PER_SEC; - nsecs -= secs * NSECS_PER_SEC; - usecs = nsecs / NSECS_PER_USEC; - type = trace_parse_common_type(data); event = trace_find_event(type); @@ -2951,17 +2930,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, pid = trace_parse_common_pid(data); if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) - return pretty_print_func_graph(data, size, event, cpu, - pid, comm, secs, usecs); + return pretty_print_func_graph(data, size, event, cpu, pid); - if (latency_format) { - printf("%8.8s-%-5d %3d", - comm, pid, cpu); + if (latency_format) print_lat_fmt(data, size); - } else - printf("%16s-%-5d [%03d]", comm, pid, cpu); - - printf(" %5lu.%06lu: %s: ", secs, usecs, event->name); if (event->flags & EVENT_FL_FAILED) { printf("EVENT '%s' FAILED TO PARSE\n", @@ -2970,7 +2942,6 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, } pretty_print(data, size, event); - printf("\n"); } static void print_fields(struct print_flag_sym *field) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 5f7b5139c321..b04da5722437 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -177,8 +177,7 @@ void print_printk(void); int parse_ftrace_file(char *buf, unsigned long size); int parse_event_file(char *buf, unsigned long size, char *sys); -void print_event(int cpu, void *data, int size, unsigned long long nsecs, - char *comm); +void print_trace_event(int cpu, void *data, int size); extern int file_bigendian; extern int host_bigendian; From 745f43e3433a7939bd9c351c8106e0c1db2044c6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:26 -0700 Subject: [PATCH 14/30] perf script: Support custom field selection for output Allow a user to select which fields to print to stdout for event data. Options include comm (command name), tid (thread id), pid (process id), time (perf timestamp), cpu, event (for event name), and trace (for trace data). Default is set to maintain compatibility with current output; this feature does alter output format slightly -- no '-' between command and pid/tid. Thanks to Frederic Weisbecker for detailed suggestions on this approach. Examples (output compressed) 1. trace, default format perf record -ga -e sched:sched_switch perf script swapper 0 [000] 537.037184: sched_switch: prev_comm=swapper prev_pid=0... sshd 1675 [000] 537.037309: sched_switch: prev_comm=sshd prev_pid=1675... netstat 1692 [001] 537.038664: sched_switch: prev_comm=netstat prev_pid=1692... 2. trace, custom format perf record -ga -e sched:sched_switch perf script -f comm,pid,time,trace <--- omitting cpu and event name swapper 0 537.037184: prev_comm=swapper prev_pid=0 prev_prio=120 ... sshd 1675 537.037309: prev_comm=sshd prev_pid=1675 prev_prio=120 ... netstat 1692 537.038664: prev_comm=netstat prev_pid=1692 prev_prio=120 ... Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-5-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 + tools/perf/builtin-script.c | 141 ++++++++++++++++++++--- 2 files changed, 130 insertions(+), 16 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 29ad94293cd2..b73cf589c109 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -112,6 +112,11 @@ OPTIONS --debug-mode:: Do various checks like samples ordering and lost events. +-f:: +--fields + Comma separated list of fields to print. Options are: + comm, tid, pid, time, cpu, event, trace + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0a79da21df23..c046e6e8600a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -20,6 +20,38 @@ static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; +enum perf_output_field { + PERF_OUTPUT_COMM = 1U << 0, + PERF_OUTPUT_TID = 1U << 1, + PERF_OUTPUT_PID = 1U << 2, + PERF_OUTPUT_TIME = 1U << 3, + PERF_OUTPUT_CPU = 1U << 4, + PERF_OUTPUT_EVNAME = 1U << 5, + PERF_OUTPUT_TRACE = 1U << 6, +}; + +struct output_option { + const char *str; + enum perf_output_field field; +} all_output_options[] = { + {.str = "comm", .field = PERF_OUTPUT_COMM}, + {.str = "tid", .field = PERF_OUTPUT_TID}, + {.str = "pid", .field = PERF_OUTPUT_PID}, + {.str = "time", .field = PERF_OUTPUT_TIME}, + {.str = "cpu", .field = PERF_OUTPUT_CPU}, + {.str = "event", .field = PERF_OUTPUT_EVNAME}, + {.str = "trace", .field = PERF_OUTPUT_TRACE}, +}; + +/* default set to maintain compatibility with current format */ +static u64 output_fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \ + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \ + PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE; + +static bool output_set_by_user; + +#define PRINT_FIELD(x) (output_fields & PERF_OUTPUT_##x) + static void print_sample_start(struct perf_sample *sample, struct thread *thread) { @@ -28,24 +60,45 @@ static void print_sample_start(struct perf_sample *sample, const char *evname = NULL; unsigned long secs; unsigned long usecs; - unsigned long long nsecs = sample->time; + unsigned long long nsecs; - if (latency_format) - printf("%8.8s-%-5d %3d", thread->comm, sample->tid, sample->cpu); - else - printf("%16s-%-5d [%03d]", thread->comm, sample->tid, sample->cpu); + if (PRINT_FIELD(COMM)) { + if (latency_format) + printf("%8.8s ", thread->comm); + else + printf("%16s ", thread->comm); + } - secs = nsecs / NSECS_PER_SEC; - nsecs -= secs * NSECS_PER_SEC; - usecs = nsecs / NSECS_PER_USEC; - printf(" %5lu.%06lu: ", secs, usecs); + if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) + printf("%5d/%-5d ", sample->pid, sample->tid); + else if (PRINT_FIELD(PID)) + printf("%5d ", sample->pid); + else if (PRINT_FIELD(TID)) + printf("%5d ", sample->tid); - type = trace_parse_common_type(sample->raw_data); - event = trace_find_event(type); - if (event) - evname = event->name; + if (PRINT_FIELD(CPU)) { + if (latency_format) + printf("%3d ", sample->cpu); + else + printf("[%03d] ", sample->cpu); + } - printf("%s: ", evname ? evname : "(unknown)"); + if (PRINT_FIELD(TIME)) { + nsecs = sample->time; + secs = nsecs / NSECS_PER_SEC; + nsecs -= secs * NSECS_PER_SEC; + usecs = nsecs / NSECS_PER_USEC; + printf("%5lu.%06lu: ", secs, usecs); + } + + if (PRINT_FIELD(EVNAME)) { + type = trace_parse_common_type(sample->raw_data); + event = trace_find_event(type); + if (event) + evname = event->name; + + printf("%s: ", evname ? evname : "(unknown)"); + } } static void process_event(union perf_event *event __unused, @@ -54,7 +107,11 @@ static void process_event(union perf_event *event __unused, struct thread *thread) { print_sample_start(sample, thread); - print_trace_event(sample->cpu, sample->raw_data, sample->raw_size); + + if (PRINT_FIELD(TRACE)) + print_trace_event(sample->cpu, sample->raw_data, + sample->raw_size); + printf("\n"); } @@ -311,6 +368,48 @@ static int parse_scriptname(const struct option *opt __used, return 0; } +static int parse_output_fields(const struct option *opt __used, + const char *arg, int unset __used) +{ + char *tok; + int i, imax = sizeof(all_output_options) / sizeof(struct output_option); + int rc = 0; + char *str = strdup(arg); + + if (!str) + return -ENOMEM; + + tok = strtok(str, ","); + if (!tok) { + fprintf(stderr, "Invalid field string."); + return -EINVAL; + } + + output_fields = 0; + while (1) { + for (i = 0; i < imax; ++i) { + if (strcmp(tok, all_output_options[i].str) == 0) { + output_fields |= all_output_options[i].field; + break; + } + } + if (i == imax) { + fprintf(stderr, "Invalid field requested."); + rc = -EINVAL; + break; + } + + tok = strtok(NULL, ","); + if (!tok) + break; + } + + output_set_by_user = true; + + free(str); + return rc; +} + /* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ static int is_directory(const char *base_path, const struct dirent *dent) { @@ -623,6 +722,9 @@ static const struct option options[] = { "input file name"), OPT_BOOLEAN('d', "debug-mode", &debug_mode, "do various checks like samples ordering and lost events"), + OPT_CALLBACK('f', "fields", NULL, "str", + "comma separated output fields. Options: comm,tid,pid,time,cpu,event,trace", + parse_output_fields), OPT_END() }; @@ -809,8 +911,15 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (generate_script_lang) { struct stat perf_stat; + int input; - int input = open(input_name, O_RDONLY); + if (output_set_by_user) { + fprintf(stderr, + "custom fields not supported for generated scripts"); + return -1; + } + + input = open(input_name, O_RDONLY); if (input < 0) { perror("failed to open file"); exit(-1); From c0230b2bfbd16e42d937c34aed99e5d6493eb5e4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:27 -0700 Subject: [PATCH 15/30] perf script: Add support for dumping symbols Add option to dump symbols found in events. e.g., perf script -f comm,pid,tid,time,trace,sym swapper 0/0 537.037184: prev_comm=swapper prev_pid=0 prev_prio=120... ffffffff81030350 perf_trace_sched_switch ([kernel.kallsyms]) ffffffff81382ac5 schedule ([kernel.kallsyms]) ffffffff8100134a cpu_idle ([kernel.kallsyms]) ffffffff81370b39 rest_init ([kernel.kallsyms]) ffffffff81696c23 start_kernel ([kernel.kallsyms].init.text) ffffffff816962af x86_64_start_reservations ([kernel.kallsyms].init.text) ffffffff816963b9 x86_64_start_kernel ([kernel.kallsyms].init.text) sshd 1675/1675 537.037309: prev_comm=sshd prev_pid=1675 prev_prio=120... ffffffff81030350 perf_trace_sched_switch ([kernel.kallsyms]) ffffffff81382ac5 schedule ([kernel.kallsyms]) ffffffff813837aa schedule_hrtimeout_range_clock ([kernel.kallsyms]) ffffffff81383886 schedule_hrtimeout_range ([kernel.kallsyms]) ffffffff8110c4f9 poll_schedule_timeout ([kernel.kallsyms]) ffffffff8110cd20 do_select ([kernel.kallsyms]) ffffffff8110ced8 core_sys_select ([kernel.kallsyms]) ffffffff8110d00d sys_select ([kernel.kallsyms]) ffffffff81002bc2 system_call ([kernel.kallsyms]) 7f1647e56e93 __GI_select (/lib64/libc-2.12.90.so) netstat 1692/1692 537.038664: prev_comm=netstat prev_pid=1692 prev_prio=... ffffffff81030350 perf_trace_sched_switch ([kernel.kallsyms]) ffffffff81382ac5 schedule ([kernel.kallsyms]) ffffffff81002c3a sysret_careful ([kernel.kallsyms]) 7f7a6cd1b210 __GI___libc_read (/lib64/libc-2.12.90.so) Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-6-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 16 ++++++- tools/perf/builtin-script.c | 31 +++++++++++- tools/perf/util/session.c | 61 ++++++++++++++++++++++++ tools/perf/util/session.h | 4 ++ 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index b73cf589c109..c64118a4de49 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -115,7 +115,21 @@ OPTIONS -f:: --fields Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace + comm, tid, pid, time, cpu, event, trace, sym + +-k:: +--vmlinux=:: + vmlinux pathname + +--kallsyms=:: + kallsyms pathname + +--symfs=:: + Look for files with symbols relative to this directory. + +-G:: +--hide-call-graph:: + When printing symbols do not display call chain. SEE ALSO -------- diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c046e6e8600a..b45b09c13f7a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -19,6 +19,7 @@ static bool debug_mode; static u64 last_timestamp; static u64 nr_unordered; extern const struct option record_options[]; +static bool no_callchain; enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, @@ -28,6 +29,7 @@ enum perf_output_field { PERF_OUTPUT_CPU = 1U << 4, PERF_OUTPUT_EVNAME = 1U << 5, PERF_OUTPUT_TRACE = 1U << 6, + PERF_OUTPUT_SYM = 1U << 7, }; struct output_option { @@ -41,6 +43,7 @@ struct output_option { {.str = "cpu", .field = PERF_OUTPUT_CPU}, {.str = "event", .field = PERF_OUTPUT_EVNAME}, {.str = "trace", .field = PERF_OUTPUT_TRACE}, + {.str = "sym", .field = PERF_OUTPUT_SYM}, }; /* default set to maintain compatibility with current format */ @@ -65,6 +68,8 @@ static void print_sample_start(struct perf_sample *sample, if (PRINT_FIELD(COMM)) { if (latency_format) printf("%8.8s ", thread->comm); + else if (PRINT_FIELD(SYM) && symbol_conf.use_callchain) + printf("%s ", thread->comm); else printf("%16s ", thread->comm); } @@ -112,6 +117,14 @@ static void process_event(union perf_event *event __unused, print_trace_event(sample->cpu, sample->raw_data, sample->raw_size); + if (PRINT_FIELD(SYM)) { + if (!symbol_conf.use_callchain) + printf(" "); + else + printf("\n"); + perf_session__print_symbols(event, sample, session); + } + printf("\n"); } @@ -190,7 +203,10 @@ static int process_sample_event(union perf_event *event, static struct perf_event_ops event_ops = { .sample = process_sample_event, + .mmap = perf_event__process_mmap, .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, .attr = perf_event__process_attr, .event_type = perf_event__process_event_type, .tracing_data = perf_event__process_tracing_data, @@ -722,8 +738,16 @@ static const struct option options[] = { "input file name"), OPT_BOOLEAN('d', "debug-mode", &debug_mode, "do various checks like samples ordering and lost events"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), + OPT_BOOLEAN('G', "hide-call-graph", &no_callchain, + "When printing symbols do not display call chain"), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), OPT_CALLBACK('f', "fields", NULL, "str", - "comma separated output fields. Options: comm,tid,pid,time,cpu,event,trace", + "comma separated output fields. Options: comm,tid,pid,time,cpu,event,trace,sym", parse_output_fields), OPT_END() @@ -905,6 +929,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (session == NULL) return -ENOMEM; + if (!no_callchain && (session->sample_type & PERF_SAMPLE_CALLCHAIN)) + symbol_conf.use_callchain = true; + else + symbol_conf.use_callchain = false; + if (strcmp(input_name, "-") && !perf_session__has_traces(session, "record -R")) return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f26639fa0fb3..c68cf40764f9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1134,3 +1134,64 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) return ret; } + +void perf_session__print_symbols(union perf_event *event, + struct perf_sample *sample, + struct perf_session *session) +{ + struct addr_location al; + const char *symname, *dsoname; + struct callchain_cursor *cursor = &session->callchain_cursor; + struct callchain_cursor_node *node; + + if (perf_event__preprocess_sample(event, session, &al, sample, + NULL) < 0) { + error("problem processing %d event, skipping it.\n", + event->header.type); + return; + } + + if (symbol_conf.use_callchain && sample->callchain) { + + if (perf_session__resolve_callchain(session, al.thread, + sample->callchain, NULL) != 0) { + if (verbose) + error("Failed to resolve callchain. Skipping\n"); + return; + } + callchain_cursor_commit(cursor); + + while (1) { + node = callchain_cursor_current(cursor); + if (!node) + break; + + if (node->sym && node->sym->name) + symname = node->sym->name; + else + symname = ""; + + if (node->map && node->map->dso && node->map->dso->name) + dsoname = node->map->dso->name; + else + dsoname = ""; + + printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname); + + callchain_cursor_advance(cursor); + } + + } else { + if (al.sym && al.sym->name) + symname = al.sym->name; + else + symname = ""; + + if (al.map && al.map->dso && al.map->dso->name) + dsoname = al.map->dso->name; + else + dsoname = ""; + + printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname); + } +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b5b148b0aaca..0b3c9afecaa9 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -159,4 +159,8 @@ static inline int perf_session__parse_sample(struct perf_session *session, session->sample_id_all, sample); } +void perf_session__print_symbols(union perf_event *event, + struct perf_sample *sample, + struct perf_session *session); + #endif /* __PERF_SESSION_H */ From 1424dc96807909438282663079adc7f27c10b4a5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Mar 2011 22:23:28 -0700 Subject: [PATCH 16/30] perf script: Add support for H/W and S/W events Custom fields set for each type by prepending field argument with type. For file with multiple event types (e.g., trace and S/W) display of an event type suppressed by setting output fields to "". e.g., perf record -ga -e sched:sched_switch -e cpu-clock -c 10000000 -R -- sleep 1 perf script openssl 11496 [000] 9711.807107: cpu-clock-msecs: ffffffff810c22dc arch_local_irq_restore ([kernel.kallsyms]) ffffffff810c518c __alloc_pages_nodemask ([kernel.kallsyms]) ffffffff810297b2 pte_alloc_one ([kernel.kallsyms]) ffffffff810d8b98 __pte_alloc ([kernel.kallsyms]) ffffffff810daf07 handle_mm_fault ([kernel.kallsyms]) ffffffff8138763a do_page_fault ([kernel.kallsyms]) ffffffff81384a65 page_fault ([kernel.kallsyms]) 7f6130507d70 asn1_check_tlen (/lib64/libcrypto.so.1.0.0c) 0 () openssl 11496 [000] 9711.808042: sched_switch: prev_comm=openssl ... kworker/0:0 4 [000] 9711.808067: sched_switch: prev_comm=kworker/... swapper 0 [001] 9711.808090: sched_switch: prev_comm=kworker/... sshd 11451 [001] 9711.808185: sched_switch: prev_comm=sshd pre... swapper 0 [001] 9711.816155: cpu-clock-msecs: ffffffff81023609 native_safe_halt ([kernel.kallsyms]) ffffffff8100132a cpu_idle ([kernel.kallsyms]) ffffffff8137cf9b start_secondary ([kernel.kallsyms]) openssl 11496 [000] 9711.817104: cpu-clock-msecs: 7f61304ad723 AES_cbc_encrypt (/lib64/libcrypto.so.1.0.0c) 7fff3402f950 () 12f0debc9a785634 () swapper 0 [001] 9711.826155: cpu-clock-msecs: ffffffff81023609 native_safe_halt ([kernel.kallsyms]) ffffffff8100132a cpu_idle ([kernel.kallsyms]) ffffffff8137cf9b start_secondary ([kernel.kallsyms]) To suppress trace events within the file and use default output for S/W events: perf script -f trace: or to suppress S/W events and do default display for trace events: perf script -f sw: Custom field selections: perf script -f sw:comm,tid,time -f trace:time,trace openssl 11496 9711.797162: swapper 0 9711.807071: openssl 11496 9711.807107: 9711.808042: prev_comm=openssl prev_pid=11496 prev_prio=120 prev_state=R ... 9711.808067: prev_comm=kworker/0:0 prev_pid=4 prev_prio=120 prev_state=S ... 9711.808090: prev_comm=kworker/0:0 prev_pid=0 prev_prio=120 prev_state=R ... 9711.808185: prev_comm=sshd prev_pid=11451 prev_prio=120 prev_state=S ==>... swapper 0 9711.816155: openssl 11496 9711.817104: swapper 0 9711.826155: Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1299734608-5223-7-git-send-email-daahern@cisco.com> Signed-off-by: David Ahern Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 +- tools/perf/builtin-script.c | 155 ++++++++++++++++++----- tools/perf/util/parse-events.c | 22 ++++ tools/perf/util/parse-events.h | 1 + 4 files changed, 147 insertions(+), 36 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c64118a4de49..66f040b30729 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -115,7 +115,10 @@ OPTIONS -f:: --fields Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace, sym + comm, tid, pid, time, cpu, event, trace, sym. Field + list must be prepended with the type, trace, sw or hw, + to indicate to which event type the field list applies. + e.g., -f sw:comm,tid,time,sym and -f trace:time,cpu,trace -k:: --vmlinux=:: diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b45b09c13f7a..9f5fc5492141 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -12,6 +12,8 @@ #include "util/trace-event.h" #include "util/parse-options.h" #include "util/util.h" +#include "util/evlist.h" +#include "util/evsel.h" static char const *script_name; static char const *generate_script_lang; @@ -47,16 +49,65 @@ struct output_option { }; /* default set to maintain compatibility with current format */ -static u64 output_fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \ - PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \ - PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE; +static u64 output_fields[PERF_TYPE_MAX] = { + [PERF_TYPE_HARDWARE] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \ + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \ + PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, + + [PERF_TYPE_SOFTWARE] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \ + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \ + PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, + + [PERF_TYPE_TRACEPOINT] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \ + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \ + PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE, +}; static bool output_set_by_user; -#define PRINT_FIELD(x) (output_fields & PERF_OUTPUT_##x) +#define PRINT_FIELD(x) (output_fields[attr->type] & PERF_OUTPUT_##x) + +static int perf_session__check_attr(struct perf_session *session, + struct perf_event_attr *attr) +{ + if (PRINT_FIELD(TRACE) && + !perf_session__has_traces(session, "record -R")) + return -EINVAL; + + if (PRINT_FIELD(SYM)) { + if (!(session->sample_type & PERF_SAMPLE_IP)) { + pr_err("Samples do not contain IP data.\n"); + return -EINVAL; + } + if (!no_callchain && + !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) + symbol_conf.use_callchain = false; + } + + if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && + !(session->sample_type & PERF_SAMPLE_TID)) { + pr_err("Samples do not contain TID/PID data.\n"); + return -EINVAL; + } + + if (PRINT_FIELD(TIME) && + !(session->sample_type & PERF_SAMPLE_TIME)) { + pr_err("Samples do not contain timestamps.\n"); + return -EINVAL; + } + + if (PRINT_FIELD(CPU) && + !(session->sample_type & PERF_SAMPLE_CPU)) { + pr_err("Samples do not contain cpu.\n"); + return -EINVAL; + } + + return 0; +} static void print_sample_start(struct perf_sample *sample, - struct thread *thread) + struct thread *thread, + struct perf_event_attr *attr) { int type; struct event *event; @@ -97,10 +148,13 @@ static void print_sample_start(struct perf_sample *sample, } if (PRINT_FIELD(EVNAME)) { - type = trace_parse_common_type(sample->raw_data); - event = trace_find_event(type); - if (event) - evname = event->name; + if (attr->type == PERF_TYPE_TRACEPOINT) { + type = trace_parse_common_type(sample->raw_data); + event = trace_find_event(type); + if (event) + evname = event->name; + } else + evname = __event_name(attr->type, attr->config); printf("%s: ", evname ? evname : "(unknown)"); } @@ -108,10 +162,27 @@ static void print_sample_start(struct perf_sample *sample, static void process_event(union perf_event *event __unused, struct perf_sample *sample, - struct perf_session *session __unused, + struct perf_session *session, struct thread *thread) { - print_sample_start(sample, thread); + struct perf_event_attr *attr; + struct perf_evsel *evsel; + + evsel = perf_evlist__id2evsel(session->evlist, sample->id); + if (evsel == NULL) { + pr_err("Invalid data. Contains samples with id not in " + "its header!\n"); + return; + } + attr = &evsel->attr; + + if (output_fields[attr->type] == 0) + return; + + if (perf_session__check_attr(session, attr) < 0) + return; + + print_sample_start(sample, thread, attr); if (PRINT_FIELD(TRACE)) print_trace_event(sample->cpu, sample->raw_data, @@ -183,19 +254,17 @@ static int process_sample_event(union perf_event *event, return -1; } - if (session->sample_type & PERF_SAMPLE_RAW) { - if (debug_mode) { - if (sample->time < last_timestamp) { - pr_err("Samples misordered, previous: %" PRIu64 - " this: %" PRIu64 "\n", last_timestamp, - sample->time); - nr_unordered++; - } - last_timestamp = sample->time; - return 0; + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; } - scripting_ops->process_event(event, sample, session, thread); + last_timestamp = sample->time; + return 0; } + scripting_ops->process_event(event, sample, session, thread); session->hists.stats.total_period += sample->period; return 0; @@ -391,21 +460,40 @@ static int parse_output_fields(const struct option *opt __used, int i, imax = sizeof(all_output_options) / sizeof(struct output_option); int rc = 0; char *str = strdup(arg); + int type = -1; if (!str) return -ENOMEM; - tok = strtok(str, ","); + tok = strtok(str, ":"); if (!tok) { - fprintf(stderr, "Invalid field string."); + fprintf(stderr, + "Invalid field string - not prepended with type."); return -EINVAL; } - output_fields = 0; + /* first word should state which event type user + * is specifying the fields + */ + if (!strcmp(tok, "hw")) + type = PERF_TYPE_HARDWARE; + else if (!strcmp(tok, "sw")) + type = PERF_TYPE_SOFTWARE; + else if (!strcmp(tok, "trace")) + type = PERF_TYPE_TRACEPOINT; + else { + fprintf(stderr, "Invalid event type in field string."); + return -EINVAL; + } + + output_fields[type] = 0; while (1) { + tok = strtok(NULL, ","); + if (!tok) + break; for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) { - output_fields |= all_output_options[i].field; + output_fields[type] |= all_output_options[i].field; break; } } @@ -414,10 +502,11 @@ static int parse_output_fields(const struct option *opt __used, rc = -EINVAL; break; } + } - tok = strtok(NULL, ","); - if (!tok) - break; + if (output_fields[type] == 0) { + pr_debug("No fields requested for %s type. " + "Events will not be displayed\n", event_type(type)); } output_set_by_user = true; @@ -747,7 +836,7 @@ static const struct option options[] = { OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), OPT_CALLBACK('f', "fields", NULL, "str", - "comma separated output fields. Options: comm,tid,pid,time,cpu,event,trace,sym", + "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace. Fields: comm,tid,pid,time,cpu,event,trace,sym", parse_output_fields), OPT_END() @@ -929,15 +1018,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (session == NULL) return -ENOMEM; - if (!no_callchain && (session->sample_type & PERF_SAMPLE_CALLCHAIN)) + if (!no_callchain) symbol_conf.use_callchain = true; else symbol_conf.use_callchain = false; - if (strcmp(input_name, "-") && - !perf_session__has_traces(session, "record -R")) - return -EINVAL; - if (generate_script_lang) { struct stat perf_stat; int input; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 54a7e2634d58..952b4ae3d954 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -263,6 +263,28 @@ static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) return name; } +const char *event_type(int type) +{ + switch (type) { + case PERF_TYPE_HARDWARE: + return "hardware"; + + case PERF_TYPE_SOFTWARE: + return "software"; + + case PERF_TYPE_TRACEPOINT: + return "tracepoint"; + + case PERF_TYPE_HW_CACHE: + return "hardware-cache"; + + default: + break; + } + + return "unknown"; +} + const char *event_name(struct perf_evsel *evsel) { u64 config = evsel->attr.config; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 212f88e07a9c..746d3fcbfc2a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -20,6 +20,7 @@ struct tracepoint_path { extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern bool have_tracepoints(struct list_head *evlist); +const char *event_type(int type); const char *event_name(struct perf_evsel *event); extern const char *__event_name(int type, u64 config); From 43adec955edd116c3e98c6e2f85fbd63281f5221 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Mar 2011 11:04:13 -0300 Subject: [PATCH 17/30] perf evlist: New command to list the names of events present in a perf.data file [root@emilia ~]# perf record -a -e sched:* -e timer:timer* sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.172 MB perf.data (~7530 samples) ] [root@emilia ~]# perf evlist sched:sched_kthread_stop sched:sched_kthread_stop_ret sched:sched_wakeup sched:sched_wakeup_new sched:sched_switch sched:sched_migrate_task sched:sched_process_free sched:sched_process_exit sched:sched_wait_task sched:sched_process_wait sched:sched_process_fork sched:sched_stat_wait sched:sched_stat_sleep sched:sched_stat_iowait sched:sched_stat_runtime sched:sched_pi_setprio timer:timer_init timer:timer_start timer:timer_expire_entry timer:timer_expire_exit timer:timer_cancel [root@emilia ~]# Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 26 ++++++++++++ tools/perf/Makefile | 1 + tools/perf/builtin-evlist.c | 54 ++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 84 insertions(+) create mode 100644 tools/perf/Documentation/perf-evlist.txt create mode 100644 tools/perf/builtin-evlist.c diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt new file mode 100644 index 000000000000..0cada9e053dc --- /dev/null +++ b/tools/perf/Documentation/perf-evlist.txt @@ -0,0 +1,26 @@ +perf-evlist(1) +============== + +NAME +---- +perf-evlist - List the event names in a perf.data file + +SYNOPSIS +-------- +[verse] +'perf evlist ' + +DESCRIPTION +----------- +This command displays the names of events sampled in a perf.data file. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data) + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-list[1], +linkperf:perf-report[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9b8421805c5c..158c30e8210c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -338,6 +338,7 @@ endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o +BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o BUILTIN_OBJS += $(OUTPUT)builtin-help.o BUILTIN_OBJS += $(OUTPUT)builtin-sched.o BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c new file mode 100644 index 000000000000..4c5e9e04a41f --- /dev/null +++ b/tools/perf/builtin-evlist.c @@ -0,0 +1,54 @@ +/* + * Builtin evlist command: Show the list of event selectors present + * in a perf.data file. + */ +#include "builtin.h" + +#include "util/util.h" + +#include + +#include "perf.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/parse-options.h" +#include "util/session.h" + +static char const *input_name = "perf.data"; + +static int __cmd_evlist(void) +{ + struct perf_session *session; + struct perf_evsel *pos; + + session = perf_session__new(input_name, O_RDONLY, 0, false, NULL); + if (session == NULL) + return -ENOMEM; + + list_for_each_entry(pos, &session->evlist->entries, node) + printf("%s\n", event_name(pos)); + + perf_session__delete(session); + return 0; +} + +static const char * const evlist_usage[] = { + "perf evlist []", + NULL +}; + +static const struct option options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_END() +}; + +int cmd_evlist(int argc, const char **argv, const char *prefix __used) +{ + argc = parse_options(argc, argv, options, evlist_usage, 0); + if (argc) + usage_with_options(evlist_usage, options); + + return __cmd_evlist(); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index c7798c7f24ed..4702e2443a8e 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -19,6 +19,7 @@ extern int cmd_bench(int argc, const char **argv, const char *prefix); extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); extern int cmd_diff(int argc, const char **argv, const char *prefix); +extern int cmd_evlist(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_sched(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 16b5088cf8f4..d695fe40fbff 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -8,6 +8,7 @@ perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-diff mainporcelain common +perf-evlist mainporcelain common perf-inject mainporcelain common perf-list mainporcelain common perf-sched mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 595d0f4a7103..ec635b7cc8ea 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -313,6 +313,7 @@ static void handle_internal_command(int argc, const char **argv) { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "diff", cmd_diff, 0 }, + { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, From 58d406ed6a5f1ca4bc1dba5390b718c67847fa5f Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 15 Mar 2011 19:16:40 -0700 Subject: [PATCH 18/30] perf tools: Version incorrect with some versions of grep Some versions of grep don't treat '\s' properly. When building perf on such systems and using a kernel tarball the perf version is unable to be determined from the main kernel Makefile and the user is left with a version of '..'. Replacing the use of '\s' with '[[:space:]]', which should work in all grep versions, gives a usable version number. Reported-by: Tapan Dhimant Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Tapan Dhimant Cc: linux-kernel@vger.kernel.org Cc: stable@kernel.org LKML-Reference: <1300241800-30281-1-git-send-email-johunt@akamai.com> Signed-off-by: Josh Hunt Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/PERF-VERSION-GEN | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 97d76562a1a0..26d4d3fd6deb 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -23,10 +23,10 @@ if test -d ../../.git -o -f ../../.git && then VN=$(echo "$VN" | sed -e 's/-/./g'); else - eval `grep '^VERSION\s*=' ../../Makefile|tr -d ' '` - eval `grep '^PATCHLEVEL\s*=' ../../Makefile|tr -d ' '` - eval `grep '^SUBLEVEL\s*=' ../../Makefile|tr -d ' '` - eval `grep '^EXTRAVERSION\s*=' ../../Makefile|tr -d ' '` + eval $(grep '^VERSION[[:space:]]*=' ../../Makefile|tr -d ' ') + eval $(grep '^PATCHLEVEL[[:space:]]*=' ../../Makefile|tr -d ' ') + eval $(grep '^SUBLEVEL[[:space:]]*=' ../../Makefile|tr -d ' ') + eval $(grep '^EXTRAVERSION[[:space:]]*=' ../../Makefile|tr -d ' ') VN="${VERSION}.${PATCHLEVEL}.${SUBLEVEL}${EXTRAVERSION}" fi From 9df03abeda3d928ecdedc0f427336931eac0a477 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 22 Feb 2011 18:47:15 +0100 Subject: [PATCH 19/30] perf lock: Fix sorting by wait_min If lock was uncontended, wait_time_min == ULLONG_MAX, so we need to handle this case differently to show high wait times first Acked-by: Hitoshi Mitake Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20110222174715.GC9687@joi.lan> Signed-off-by: Marcin Slusarz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2e93f99b1480..7a2a79d2cf2c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -202,9 +202,20 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid) SINGLE_KEY(nr_acquired) SINGLE_KEY(nr_contended) SINGLE_KEY(wait_time_total) -SINGLE_KEY(wait_time_min) SINGLE_KEY(wait_time_max) +static int lock_stat_key_wait_time_min(struct lock_stat *one, + struct lock_stat *two) +{ + u64 s1 = one->wait_time_min; + u64 s2 = two->wait_time_min; + if (s1 == ULLONG_MAX) + s1 = 0; + if (s2 == ULLONG_MAX) + s2 = 0; + return s1 > s2; +} + struct lock_key { /* * name: the value for specify by user From eefaaac46470d105f58cff93c8176cfadc75b857 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 9 Mar 2011 23:21:28 +0800 Subject: [PATCH 20/30] perf, x86: Clean up SandyBridge PEBS events Use INTEL_EVENT_CONSTRAINT() for the events where all umasks support PEBS. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1299684089-22835-2-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 27 +++++------------------ 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b95c66ae4a2a..b72687f31f5c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -412,20 +412,9 @@ static struct event_constraint intel_snb_pebs_events[] = { PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ - PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ - PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ - PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ - PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ - PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ - PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ - PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ - PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ - PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ - PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ - PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ - PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ @@ -434,14 +423,8 @@ static struct event_constraint intel_snb_pebs_events[] = { PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ - PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ - PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ - PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ - PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ - PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ - PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ - PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ - PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; From 7d5d02dadd43db7f829775e404e82019c5d5586f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 9 Mar 2011 23:21:29 +0800 Subject: [PATCH 21/30] perf, x86: Use INTEL_*_CONSTRAINT() for all PEBS event constraints PEBS_EVENT_CONSTRAINT() is just a duplicate of INTEL_UEVENT_CONSTRAINT(). Remove it and use INTEL_UEVENT_CONSTRAINT() instead. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1299684089-22835-3-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 - arch/x86/kernel/cpu/perf_event_intel_ds.c | 95 +++++++++++------------ 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 26604188aa49..e8dbe179587f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -178,8 +178,6 @@ struct cpu_hw_events { */ #define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) -#define PEBS_EVENT_CONSTRAINT(c, n) \ - INTEL_UEVENT_CONSTRAINT(c, n) #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b72687f31f5c..bab491b8ee25 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -362,70 +362,69 @@ static int intel_pmu_drain_bts_buffer(void) * PEBS */ static struct event_constraint intel_core2_pebs_event_constraints[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ - PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ + INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ + INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_atom_pebs_event_constraints[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_nehalem_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ - INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ - INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ - INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_westmere_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ - INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ - - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_snb_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ + INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ + INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ + INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; From 0837e3242c73566fc1c0196b4ec61779c25ffc93 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 9 Mar 2011 14:38:42 +1100 Subject: [PATCH 22/30] perf, powerpc: Handle events that raise an exception without overflowing Events on POWER7 can roll back if a speculative event doesn't eventually complete. Unfortunately in some rare cases they will raise a performance monitor exception. We need to catch this to ensure we reset the PMC. In all cases the PMC will be 256 or less cycles from overflow. Signed-off-by: Anton Blanchard Signed-off-by: Peter Zijlstra Cc: # as far back as it applies cleanly LKML-Reference: <20110309143842.6c22845e@kryten> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/perf_event.c | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 125fc1ad665d..7626fa78e1f8 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -880,6 +880,7 @@ #define PV_970 0x0039 #define PV_POWER5 0x003A #define PV_POWER5p 0x003B +#define PV_POWER7 0x003F #define PV_970FX 0x003C #define PV_630 0x0040 #define PV_630p 0x0041 diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index ab6f6beadb57..97e0ae414940 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1269,6 +1269,28 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) return ip; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + /* * Performance monitor interrupt stuff */ @@ -1316,7 +1338,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (is_limited_pmc(i + 1)) continue; val = read_pmc(i + 1); - if ((int)val < 0) + if (pmc_overflow(val)) write_pmc(i + 1, 0); } } From 91b2f482e62ad0d444222253026a5cbca28c4ab9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Mar 2011 21:27:08 +0100 Subject: [PATCH 23/30] perf: Fix the software events state check Fix the mistakenly inverted check of events state. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: LKML-Reference: <1299529629-18280-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index ed253aa24ba4..974e2e64a43a 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5122,7 +5122,7 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { if (event->hw.state & PERF_HES_STOPPED) - return 0; + return 1; if (regs) { if (event->attr.exclude_user && user_mode(regs)) From a0f7d0f7fc02465bb9758501f611f63381792996 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Mar 2011 21:27:09 +0100 Subject: [PATCH 24/30] perf: Handle stopped state with tracepoints We toggle the state from start and stop callbacks but actually don't check it when the event triggers. Do it so that these callbacks actually work. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: Signed-off-by: Peter Zijlstra LKML-Reference: <1299529629-18280-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 974e2e64a43a..533f71570736 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5478,6 +5478,8 @@ static int perf_tp_event_match(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; /* * All tracepoints are from kernel-space. */ From ee643c4179c3a18b018de3a4c07a7bb3a75c8e4e Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 7 Mar 2011 15:46:59 +0000 Subject: [PATCH 25/30] perf: Reorder & optimize perf_event_context to remove alignment padding on 64 bit builds Remove 8 bytes of alignment padding from perf_event_context on 64 bit builds which shrinks its size to 192 bytes allowing it to fit into one fewer cache lines and into a smaller slab. Signed-off-by: Richard Kennedy Signed-off-by: Peter Zijlstra LKML-Reference: <1299512819.2039.5.camel@castor.rsk> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 614615b8d42b..f495c0147240 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -878,8 +878,8 @@ enum perf_event_context_type { * Used as a container for task events and CPU events as well: */ struct perf_event_context { - enum perf_event_context_type type; struct pmu *pmu; + enum perf_event_context_type type; /* * Protect the states of the events in the list, * nr_active, and the list: From 38b435b16c36b0d863efcf3f07b34a6fac9873fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Mar 2011 14:37:10 +0100 Subject: [PATCH 26/30] perf: Fix tear-down of inherited group events When destroying inherited events, we need to destroy groups too, otherwise the event iteration in perf_event_exit_task_context() will miss group siblings and we leak events with all the consequences. Reported-and-tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: # .35+ LKML-Reference: <1300196470.2203.61.camel@twins> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 533f71570736..3472bb1a070c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -6722,17 +6722,20 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - struct perf_event *parent_event; + if (child_event->parent) { + raw_spin_lock_irq(&child_ctx->lock); + perf_group_detach(child_event); + raw_spin_unlock_irq(&child_ctx->lock); + } perf_remove_from_context(child_event); - parent_event = child_event->parent; /* - * It can happen that parent exits first, and has events + * It can happen that the parent exits first, and has events * that are still around due to the child reference. These - * events need to be zapped - but otherwise linger. + * events need to be zapped. */ - if (parent_event) { + if (child_event->parent) { sync_child_event(child_event, child); free_event(child_event); } From c814d160f61466ea6d4491bb4e8e548856e27a58 Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Tue, 15 Feb 2011 13:02:14 -0500 Subject: [PATCH 27/30] oprofile, s390: Remove hwsampler_files.c and merge it into init.c Merge the contents of hwsampler_files.c into arch/s390/oprofile/init.c. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Heinz Graalfs Signed-off-by: Robert Richter --- arch/s390/oprofile/Makefile | 2 +- arch/s390/oprofile/hwsampler_files.c | 162 --------------------------- arch/s390/oprofile/init.c | 161 +++++++++++++++++++++++++- 3 files changed, 159 insertions(+), 166 deletions(-) delete mode 100644 arch/s390/oprofile/hwsampler_files.c diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 00d8fc8e4429..d698cddcfbdd 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o hwsampler_files.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o diff --git a/arch/s390/oprofile/hwsampler_files.c b/arch/s390/oprofile/hwsampler_files.c deleted file mode 100644 index 2e1da2449ba9..000000000000 --- a/arch/s390/oprofile/hwsampler_files.c +++ /dev/null @@ -1,162 +0,0 @@ -/** - * arch/s390/oprofile/hwsampler_files.c - * - * Copyright IBM Corp. 2010 - * Author: Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) - */ -#include -#include -#include - -#include "../../../drivers/oprofile/oprof.h" -#include "hwsampler.h" - -#define DEFAULT_INTERVAL 4096 - -#define DEFAULT_SDBT_BLOCKS 1 -#define DEFAULT_SDB_BLOCKS 511 - -static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; -static unsigned long oprofile_min_interval; -static unsigned long oprofile_max_interval; - -static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; -static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; - -static int hwsampler_file; -static int hwsampler_running; /* start_mutex must be held to change */ - -static struct oprofile_operations timer_ops; - -static int oprofile_hwsampler_start(void) -{ - int retval; - - hwsampler_running = hwsampler_file; - - if (!hwsampler_running) - return timer_ops.start(); - - retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); - if (retval) - return retval; - - retval = hwsampler_start_all(oprofile_hw_interval); - if (retval) - hwsampler_deallocate(); - - return retval; -} - -static void oprofile_hwsampler_stop(void) -{ - if (!hwsampler_running) { - timer_ops.stop(); - return; - } - - hwsampler_stop_all(); - hwsampler_deallocate(); - return; -} - -static ssize_t hwsampler_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); -} - -static ssize_t hwsampler_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) - return retval; - - if (oprofile_started) - /* - * save to do without locking as we set - * hwsampler_running in start() when start_mutex is - * held - */ - return -EBUSY; - - hwsampler_file = val; - - return count; -} - -static const struct file_operations hwsampler_fops = { - .read = hwsampler_read, - .write = hwsampler_write, -}; - -static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) -{ - struct dentry *hw_dir; - - /* reinitialize default values */ - hwsampler_file = 1; - - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; - - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); - - return 0; -} - -int oprofile_hwsampler_init(struct oprofile_operations* ops) -{ - if (hwsampler_setup()) - return -ENODEV; - - /* - * create hwsampler files only if hwsampler_setup() succeeds. - */ - oprofile_min_interval = hwsampler_query_min_interval(); - if (oprofile_min_interval < 0) { - oprofile_min_interval = 0; - return -ENODEV; - } - oprofile_max_interval = hwsampler_query_max_interval(); - if (oprofile_max_interval < 0) { - oprofile_max_interval = 0; - return -ENODEV; - } - - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); - - ops->start = oprofile_hwsampler_start; - ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; - - return 0; -} - -void oprofile_hwsampler_exit(void) -{ - oprofile_timer_exit(); - hwsampler_shutdown(); -} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 059b44b9f171..0e38a5b8793f 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -4,19 +4,174 @@ * S390 Version * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2011 OProfile authors */ #include #include #include +#include +#include +#include -extern int oprofile_hwsampler_init(struct oprofile_operations* ops); -extern void oprofile_hwsampler_exit(void); +#include "../../../drivers/oprofile/oprof.h" +#include "hwsampler.h" + +#define DEFAULT_INTERVAL 4096 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_file; +static int hwsampler_running; /* start_mutex must be held to change */ + +static struct oprofile_operations timer_ops; extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_file; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + return; +} + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_file = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +static int oprofile_create_hwsampling_files(struct super_block *sb, + struct dentry *root) +{ + struct dentry *hw_dir; + + /* reinitialize default values */ + hwsampler_file = 1; + + hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!hw_dir) + return -EINVAL; + + oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); + oprofilefs_create_ulong(sb, hw_dir, "hw_interval", + &oprofile_hw_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + return 0; +} + +int oprofile_hwsampler_init(struct oprofile_operations* ops) +{ + if (hwsampler_setup()) + return -ENODEV; + + /* + * create hwsampler files only if hwsampler_setup() succeeds. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval < 0) { + oprofile_min_interval = 0; + return -ENODEV; + } + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval < 0) { + oprofile_max_interval = 0; + return -ENODEV; + } + + if (oprofile_timer_init(ops)) + return -ENODEV; + + printk(KERN_INFO "oprofile: using hardware sampling\n"); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + ops->create_files = oprofile_create_hwsampling_files; + + return 0; +} + +void oprofile_hwsampler_exit(void) +{ + oprofile_timer_exit(); + hwsampler_shutdown(); +} + int __init oprofile_arch_init(struct oprofile_operations* ops) { ops->backtrace = s390_backtrace; From ec6b426c4dbb9eef40375c389746cab7e931a584 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 16 Mar 2011 12:10:12 +0100 Subject: [PATCH 28/30] oprofile, s390: Cleanups Remove unused HAVE_HWSAMPLER config option. It is not used anymore, removing it. Also make some functions static and some coding style fixes. Signed-off-by: Robert Richter --- arch/Kconfig | 3 --- arch/s390/Kconfig | 1 - arch/s390/oprofile/init.c | 6 +++--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 43abf3c6da8e..f78c2be4242b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,9 +30,6 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool -config HAVE_HWSAMPLER - bool - config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0cf20adfbb45..ff19efdf6fef 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,7 +115,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select HAVE_HWSAMPLER config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 0e38a5b8793f..16c76def4a9d 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -133,7 +133,7 @@ static int oprofile_create_hwsampling_files(struct super_block *sb, return 0; } -int oprofile_hwsampler_init(struct oprofile_operations* ops) +static int oprofile_hwsampler_init(struct oprofile_operations *ops) { if (hwsampler_setup()) return -ENODEV; @@ -166,13 +166,13 @@ int oprofile_hwsampler_init(struct oprofile_operations* ops) return 0; } -void oprofile_hwsampler_exit(void) +static void oprofile_hwsampler_exit(void) { oprofile_timer_exit(); hwsampler_shutdown(); } -int __init oprofile_arch_init(struct oprofile_operations* ops) +int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; From 13e5befaddcf8d542ae45610b552105490a0010b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 16 Mar 2011 17:17:08 -0700 Subject: [PATCH 29/30] trace, documentation: Fix branch profiling location in debugfs The debugfs interface for branch profiling is through /sys/kernel/debug/tracing/trace_stat/branch_annotated /sys/kernel/debug/tracing/trace_stat/branch_all so update the Kconfig accordingly. Signed-off-by: David Rientjes Cc: Steven Rostedt LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 14674dce77a6..61d7d59f4a1a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -275,7 +275,7 @@ config PROFILE_ANNOTATED_BRANCHES This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: - /sys/kernel/debug/tracing/profile_annotated_branch + /sys/kernel/debug/tracing/trace_stat/branch_annotated Note: this will add a significant overhead; only turn this on if you need to profile the system's use of these macros. @@ -288,7 +288,7 @@ config PROFILE_ALL_BRANCHES taken in the kernel is recorded whether it hit or miss. The results will be displayed in: - /sys/kernel/debug/tracing/profile_branch + /sys/kernel/debug/tracing/trace_stat/branch_all This option also enables the likely/unlikely profiler. From 1ef1d1c2353967e2d61ecaddf76edfd058a778b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Mar 2011 14:41:27 +0100 Subject: [PATCH 30/30] trace, filters: Initialize the match variable in process_ops() properly Make sure the 'match' variable always has a value. Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3249b4f77ef0..8008ddcfbf20 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -391,8 +391,8 @@ static int process_ops(struct filter_pred *preds, struct filter_pred *op, void *rec) { struct filter_pred *pred; + int match = 0; int type; - int match; int i; /*