perf: Register PMU implementations

Simple registration interface for struct pmu, this provides the
infrastructure for removing all the weak functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2010-06-11 13:35:08 +02:00 committed by Ingo Molnar
parent 51b0fe3954
commit b0a873ebbf
10 changed files with 506 additions and 430 deletions

View file

@ -642,35 +642,40 @@ static int __hw_perf_event_init(struct perf_event *event)
return 0;
}
/*
* Main entry point to initialise a HW performance event.
*/
static int alpha_pmu_event_init(struct perf_event *event)
{
int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!alpha_pmu)
return -ENODEV;
/* Do the real initialisation work. */
err = __hw_perf_event_init(event);
return err;
}
static struct pmu pmu = {
.event_init = alpha_pmu_event_init,
.enable = alpha_pmu_enable,
.disable = alpha_pmu_disable,
.read = alpha_pmu_read,
.unthrottle = alpha_pmu_unthrottle,
};
/*
* Main entry point to initialise a HW performance event.
*/
struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err;
if (!alpha_pmu)
return ERR_PTR(-ENODEV);
/* Do the real initialisation work. */
err = __hw_perf_event_init(event);
if (err)
return ERR_PTR(err);
return &pmu;
}
/*
* Main entry point - enable HW performance counters.
*/
@ -838,5 +843,7 @@ void __init init_hw_perf_events(void)
/* And set up PMU specification */
alpha_pmu = &ev67_pmu;
perf_max_events = alpha_pmu->num_pmcs;
perf_pmu_register(&pmu);
}

View file

@ -306,12 +306,7 @@ out:
return err;
}
static struct pmu pmu = {
.enable = armpmu_enable,
.disable = armpmu_disable,
.unthrottle = armpmu_unthrottle,
.read = armpmu_read,
};
static struct pmu pmu;
static int
validate_event(struct cpu_hw_events *cpuc,
@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event)
return err;
}
struct pmu *
hw_perf_event_init(struct perf_event *event)
static int armpmu_event_init(struct perf_event *event)
{
int err = 0;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!armpmu)
return ERR_PTR(-ENODEV);
return -ENODEV;
event->destroy = hw_perf_event_destroy;
if (!atomic_inc_not_zero(&active_events)) {
if (atomic_read(&active_events) > perf_max_events) {
atomic_dec(&active_events);
return ERR_PTR(-ENOSPC);
return -ENOSPC;
}
mutex_lock(&pmu_reserve_mutex);
@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event)
}
if (err)
return ERR_PTR(err);
return err;
err = __hw_perf_event_init(event);
if (err)
hw_perf_event_destroy(event);
return err ? ERR_PTR(err) : &pmu;
return err;
}
static struct pmu pmu = {
.event_init = armpmu_event_init,
.enable = armpmu_enable,
.disable = armpmu_disable,
.unthrottle = armpmu_unthrottle,
.read = armpmu_read,
};
void
hw_perf_enable(void)
{
@ -2994,6 +3006,8 @@ init_hw_perf_events(void)
perf_max_events = -1;
}
perf_pmu_register(&pmu);
return 0;
}
arch_initcall(init_hw_perf_events);

View file

@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}
struct pmu power_pmu = {
.enable = power_pmu_enable,
.disable = power_pmu_disable,
.read = power_pmu_read,
.unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
};
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
struct pmu *hw_perf_event_init(struct perf_event *event)
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
unsigned long flags;
@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
struct cpu_hw_events *cpuhw;
if (!ppmu)
return ERR_PTR(-ENXIO);
return -ENOENT;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return ERR_PTR(-EOPNOTSUPP);
return -EOPNOTSUPP;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return ERR_PTR(err);
return err;
break;
case PERF_TYPE_RAW:
ev = event->attr.config;
break;
default:
return ERR_PTR(-EINVAL);
return -ENOENT;
}
event->hw.config_base = ev;
event->hw.idx = 0;
@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
*/
ev = normal_pmc_alternative(ev, flags);
if (!ev)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
}
@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
n = collect_events(event->group_leader, ppmu->n_counter - 1,
ctrs, events, cflags);
if (n < 0)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
events[n] = ev;
ctrs[n] = event;
cflags[n] = flags;
if (check_excludes(ctrs, cflags, n, 1))
return ERR_PTR(-EINVAL);
return -EINVAL;
cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
put_cpu_var(cpu_hw_events);
if (err)
return ERR_PTR(-EINVAL);
return -EINVAL;
event->hw.config = events[n];
event->hw.event_base = cflags[n];
@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
}
event->destroy = hw_perf_event_destroy;
if (err)
return ERR_PTR(err);
return &power_pmu;
return err;
}
struct pmu power_pmu = {
.event_init = power_pmu_event_init,
.enable = power_pmu_enable,
.disable = power_pmu_disable,
.read = power_pmu_read,
.unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
};
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu)
freeze_events_kernel = MMCR0_FCHV;
#endif /* CONFIG_PPC64 */
perf_pmu_register(&power_pmu);
perf_cpu_notifier(power_pmu_notifier);
return 0;

View file

@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
local_irq_restore(flags);
}
static struct pmu fsl_emb_pmu = {
.enable = fsl_emb_pmu_enable,
.disable = fsl_emb_pmu_disable,
.read = fsl_emb_pmu_read,
.unthrottle = fsl_emb_pmu_unthrottle,
};
/*
* Release the PMU if this is the last perf_event.
*/
@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
struct pmu *hw_perf_event_init(struct perf_event *event)
static int fsl_emb_pmu_event_init(struct perf_event *event)
{
u64 ev;
struct perf_event *events[MAX_HWEVENTS];
@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return ERR_PTR(-EOPNOTSUPP);
return -EOPNOTSUPP;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return ERR_PTR(err);
return err;
break;
case PERF_TYPE_RAW:
@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
break;
default:
return ERR_PTR(-EINVAL);
return -ENOENT;
}
event->hw.config = ppmu->xlate_event(ev);
if (!(event->hw.config & FSL_EMB_EVENT_VALID))
return ERR_PTR(-EINVAL);
return -EINVAL;
/*
* If this is in a group, check if it can go on with all the
@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
n = collect_events(event->group_leader,
ppmu->n_counter - 1, events);
if (n < 0)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
}
if (num_restricted >= ppmu->n_restricted)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
event->hw.idx = -1;
@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
if (event->attr.exclude_kernel)
event->hw.config_base |= PMLCA_FCS;
if (event->attr.exclude_idle)
return ERR_PTR(-ENOTSUPP);
return -ENOTSUPP;
event->hw.last_period = event->hw.sample_period;
local64_set(&event->hw.period_left, event->hw.last_period);
@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
}
event->destroy = hw_perf_event_destroy;
if (err)
return ERR_PTR(err);
return &fsl_emb_pmu;
return err;
}
static struct pmu fsl_emb_pmu = {
.event_init = fsl_emb_pmu_event_init,
.enable = fsl_emb_pmu_enable,
.disable = fsl_emb_pmu_disable,
.read = fsl_emb_pmu_read,
.unthrottle = fsl_emb_pmu_unthrottle,
};
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pr_info("%s performance monitor hardware support registered\n",
pmu->name);
perf_pmu_register(&fsl_emb_pmu);
return 0;
}

View file

@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event)
sh_perf_event_update(event, &event->hw, event->hw.idx);
}
static int sh_pmu_event_init(struct perf_event *event)
{
int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_HARDWARE:
err = __hw_perf_event_init(event);
break;
default:
return -ENOENT;
}
if (unlikely(err)) {
if (event->destroy)
event->destroy(event);
}
return err;
}
static struct pmu pmu = {
.event_init = sh_pmu_event_init,
.enable = sh_pmu_enable,
.disable = sh_pmu_disable,
.read = sh_pmu_read,
};
struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err = __hw_perf_event_init(event);
if (unlikely(err)) {
if (event->destroy)
event->destroy(event);
return ERR_PTR(err);
}
return &pmu;
}
static void sh_pmu_setup(int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
memset(cpuhw, 0, sizeof(struct cpu_hw_events));
@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
WARN_ON(pmu->num_events > MAX_HWEVENTS);
perf_pmu_register(&pmu);
perf_cpu_notifier(sh_pmu_notifier);
return 0;
}

View file

@ -1025,7 +1025,7 @@ out:
return ret;
}
static int __hw_perf_event_init(struct perf_event *event)
static int sparc_pmu_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct perf_event *evts[MAX_HWEVENTS];
@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;
if (attr->type == PERF_TYPE_HARDWARE) {
switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)
return -EINVAL;
pmap = sparc_pmu->event_map(attr->config);
} else if (attr->type == PERF_TYPE_HW_CACHE) {
break;
case PERF_TYPE_HW_CACHE:
pmap = sparc_map_cache_event(attr->config);
if (IS_ERR(pmap))
return PTR_ERR(pmap);
} else
break;
case PERF_TYPE_RAW:
return -EOPNOTSUPP;
default:
return -ENOENT;
}
/* We save the enable bits in the config_base. */
hwc->config_base = sparc_pmu->irq_bit;
if (!attr->exclude_user)
@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
}
static struct pmu pmu = {
.event_init = sparc_pmu_event_init,
.enable = sparc_pmu_enable,
.disable = sparc_pmu_disable,
.read = sparc_pmu_read,
@ -1152,15 +1163,6 @@ static struct pmu pmu = {
.commit_txn = sparc_pmu_commit_txn,
};
struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err = __hw_perf_event_init(event);
if (err)
return ERR_PTR(err);
return &pmu;
}
void perf_event_print_debug(void)
{
unsigned long flags;
@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void)
/* All sparc64 PMUs currently have 2 events. */
perf_max_events = 2;
perf_pmu_register(&pmu);
register_die_notifier(&perf_event_nmi_notifier);
}

View file

@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
/*
* Setup the hardware configuration for a given attr_type
*/
static int __hw_perf_event_init(struct perf_event *event)
static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
perf_pmu_register(&pmu);
perf_cpu_notifier(x86_pmu_notifier);
}
@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
return 0;
}
static struct pmu pmu = {
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
};
/*
* validate that we can schedule this event
*/
@ -1569,12 +1558,22 @@ out:
return ret;
}
struct pmu *hw_perf_event_init(struct perf_event *event)
int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
err = __hw_perf_event_init(event);
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
err = __x86_pmu_event_init(event);
if (!err) {
/*
* we temporarily connect event to its pmu
@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
if (err) {
if (event->destroy)
event->destroy(event);
return ERR_PTR(err);
}
return &pmu;
return err;
}
static struct pmu pmu = {
.event_init = x86_pmu_event_init,
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
};
/*
* callchain support
*/

View file

@ -561,6 +561,13 @@ struct perf_event;
* struct pmu - generic performance monitoring unit
*/
struct pmu {
struct list_head entry;
/*
* Should return -ENOENT when the @event doesn't match this pmu
*/
int (*event_init) (struct perf_event *event);
int (*enable) (struct perf_event *event);
void (*disable) (struct perf_event *event);
int (*start) (struct perf_event *event);
@ -849,7 +856,8 @@ struct perf_output_handle {
*/
extern int perf_max_events;
extern struct pmu *hw_perf_event_init(struct perf_event *event);
extern int perf_pmu_register(struct pmu *pmu);
extern void perf_pmu_unregister(struct pmu *pmu);
extern void perf_event_task_sched_in(struct task_struct *task);
extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);

View file

@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
.priority = 0x7fffffff
};
static void bp_perf_event_destroy(struct perf_event *event)
{
release_bp_slot(event);
}
static int hw_breakpoint_event_init(struct perf_event *bp)
{
int err;
if (bp->attr.type != PERF_TYPE_BREAKPOINT)
return -ENOENT;
err = register_perf_hw_breakpoint(bp);
if (err)
return err;
bp->destroy = bp_perf_event_destroy;
return 0;
}
static struct pmu perf_breakpoint = {
.event_init = hw_breakpoint_event_init,
.enable = arch_install_hw_breakpoint,
.disable = arch_uninstall_hw_breakpoint,
.read = hw_breakpoint_pmu_read,
};
static int __init init_hw_breakpoint(void)
{
unsigned int **task_bp_pinned;
@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void)
constraints_initialized = 1;
perf_pmu_register(&perf_breakpoint);
return register_die_notifier(&hw_breakpoint_exceptions_nb);
err_alloc:
@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void)
core_initcall(init_hw_breakpoint);
struct pmu perf_ops_bp = {
.enable = arch_install_hw_breakpoint,
.disable = arch_uninstall_hw_breakpoint,
.read = hw_breakpoint_pmu_read,
};

View file

@ -31,7 +31,6 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
@ -72,14 +71,6 @@ static atomic64_t perf_event_id;
*/
static DEFINE_SPINLOCK(perf_resource_lock);
/*
* Architecture provided APIs - weak aliases:
*/
extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
{
return NULL;
}
void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event)
return 0;
}
static struct pmu perf_ops_generic = {
.enable = perf_swevent_enable,
.disable = perf_swevent_disable,
.start = perf_swevent_int,
.stop = perf_swevent_void,
.read = perf_swevent_read,
.unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
};
/*
* hrtimer based swevent callback
*/
static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
{
enum hrtimer_restart ret = HRTIMER_RESTART;
struct perf_sample_data data;
struct pt_regs *regs;
struct perf_event *event;
u64 period;
event = container_of(hrtimer, struct perf_event, hw.hrtimer);
event->pmu->read(event);
perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && current->pid == 0))
if (perf_event_overflow(event, 0, &data, regs))
ret = HRTIMER_NORESTART;
}
period = max_t(u64, 10000, event->hw.sample_period);
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
return ret;
}
static void perf_swevent_start_hrtimer(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swevent_hrtimer;
if (hwc->sample_period) {
u64 period;
if (hwc->remaining) {
if (hwc->remaining < 0)
period = 10000;
else
period = hwc->remaining;
hwc->remaining = 0;
} else {
period = max_t(u64, 10000, hwc->sample_period);
}
__hrtimer_start_range_ns(&hwc->hrtimer,
ns_to_ktime(period), 0,
HRTIMER_MODE_REL, 0);
}
}
static void perf_swevent_cancel_hrtimer(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (hwc->sample_period) {
ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
hwc->remaining = ktime_to_ns(remaining);
hrtimer_cancel(&hwc->hrtimer);
}
}
/*
* Software event: cpu wall time clock
*/
static void cpu_clock_perf_event_update(struct perf_event *event)
{
int cpu = raw_smp_processor_id();
s64 prev;
u64 now;
now = cpu_clock(cpu);
prev = local64_xchg(&event->hw.prev_count, now);
local64_add(now - prev, &event->count);
}
static int cpu_clock_perf_event_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int cpu = raw_smp_processor_id();
local64_set(&hwc->prev_count, cpu_clock(cpu));
perf_swevent_start_hrtimer(event);
return 0;
}
static void cpu_clock_perf_event_disable(struct perf_event *event)
{
perf_swevent_cancel_hrtimer(event);
cpu_clock_perf_event_update(event);
}
static void cpu_clock_perf_event_read(struct perf_event *event)
{
cpu_clock_perf_event_update(event);
}
static struct pmu perf_ops_cpu_clock = {
.enable = cpu_clock_perf_event_enable,
.disable = cpu_clock_perf_event_disable,
.read = cpu_clock_perf_event_read,
};
/*
* Software event: task time clock
*/
static void task_clock_perf_event_update(struct perf_event *event, u64 now)
{
u64 prev;
s64 delta;
prev = local64_xchg(&event->hw.prev_count, now);
delta = now - prev;
local64_add(delta, &event->count);
}
static int task_clock_perf_event_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
u64 now;
now = event->ctx->time;
local64_set(&hwc->prev_count, now);
perf_swevent_start_hrtimer(event);
return 0;
}
static void task_clock_perf_event_disable(struct perf_event *event)
{
perf_swevent_cancel_hrtimer(event);
task_clock_perf_event_update(event, event->ctx->time);
}
static void task_clock_perf_event_read(struct perf_event *event)
{
u64 time;
if (!in_nmi()) {
update_context_time(event->ctx);
time = event->ctx->time;
} else {
u64 now = perf_clock();
u64 delta = now - event->ctx->timestamp;
time = event->ctx->time + delta;
}
task_clock_perf_event_update(event, time);
}
static struct pmu perf_ops_task_clock = {
.enable = task_clock_perf_event_enable,
.disable = task_clock_perf_event_disable,
.read = task_clock_perf_event_read,
};
/* Deref the hlist from the update side */
static inline struct swevent_hlist *
swevent_hlist_deref(struct perf_cpu_context *cpuctx)
@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event)
return err;
}
#ifdef CONFIG_EVENT_TRACING
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
static struct pmu perf_ops_tracepoint = {
.enable = perf_trace_enable,
.disable = perf_trace_disable,
static void sw_perf_event_destroy(struct perf_event *event)
{
u64 event_id = event->attr.config;
WARN_ON(event->parent);
atomic_dec(&perf_swevent_enabled[event_id]);
swevent_hlist_put(event);
}
static int perf_swevent_init(struct perf_event *event)
{
int event_id = event->attr.config;
if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT;
switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
case PERF_COUNT_SW_TASK_CLOCK:
return -ENOENT;
default:
break;
}
if (event_id > PERF_COUNT_SW_MAX)
return -ENOENT;
if (!event->parent) {
int err;
err = swevent_hlist_get(event);
if (err)
return err;
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
}
return 0;
}
static struct pmu perf_swevent = {
.event_init = perf_swevent_init,
.enable = perf_swevent_enable,
.disable = perf_swevent_disable,
.start = perf_swevent_int,
.stop = perf_swevent_void,
.read = perf_swevent_read,
.unthrottle = perf_swevent_void,
.unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
};
#ifdef CONFIG_EVENT_TRACING
static int perf_tp_filter_match(struct perf_event *event,
struct perf_sample_data *data)
{
@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
perf_trace_destroy(event);
}
static struct pmu *tp_perf_event_init(struct perf_event *event)
static int perf_tp_event_init(struct perf_event *event)
{
int err;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -ENOENT;
/*
* Raw tracepoint data is a severe data leak, only allow root to
* have these.
@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event)
if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
perf_paranoid_tracepoint_raw() &&
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return -EPERM;
err = perf_trace_init(event);
if (err)
return NULL;
return err;
event->destroy = tp_perf_event_destroy;
return &perf_ops_tracepoint;
return 0;
}
static struct pmu perf_tracepoint = {
.event_init = perf_tp_event_init,
.enable = perf_trace_enable,
.disable = perf_trace_disable,
.start = perf_swevent_int,
.stop = perf_swevent_void,
.read = perf_swevent_read,
.unthrottle = perf_swevent_void,
};
static inline void perf_tp_register(void)
{
perf_pmu_register(&perf_tracepoint);
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event)
#else
static struct pmu *tp_perf_event_init(struct perf_event *event)
static inline void perf_tp_register(void)
{
return NULL;
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@ -4913,24 +4791,6 @@ static void perf_event_free_filter(struct perf_event *event)
#endif /* CONFIG_EVENT_TRACING */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
{
release_bp_slot(event);
}
static struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
err = register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
bp->destroy = bp_perf_event_destroy;
return &perf_ops_bp;
}
void perf_bp_event(struct perf_event *bp, void *data)
{
struct perf_sample_data sample;
@ -4941,77 +4801,237 @@ void perf_bp_event(struct perf_event *bp, void *data)
if (!perf_exclude_event(bp, regs))
perf_swevent_add(bp, 1, 1, &sample, regs);
}
#else
static struct pmu *bp_perf_event_init(struct perf_event *bp)
{
return NULL;
}
void perf_bp_event(struct perf_event *bp, void *regs)
{
}
#endif
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
/*
* hrtimer based swevent callback
*/
static void sw_perf_event_destroy(struct perf_event *event)
static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
{
u64 event_id = event->attr.config;
enum hrtimer_restart ret = HRTIMER_RESTART;
struct perf_sample_data data;
struct pt_regs *regs;
struct perf_event *event;
u64 period;
WARN_ON(event->parent);
event = container_of(hrtimer, struct perf_event, hw.hrtimer);
event->pmu->read(event);
atomic_dec(&perf_swevent_enabled[event_id]);
swevent_hlist_put(event);
perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && current->pid == 0))
if (perf_event_overflow(event, 0, &data, regs))
ret = HRTIMER_NORESTART;
}
period = max_t(u64, 10000, event->hw.sample_period);
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
return ret;
}
static struct pmu *sw_perf_event_init(struct perf_event *event)
static void perf_swevent_start_hrtimer(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swevent_hrtimer;
if (hwc->sample_period) {
u64 period;
if (hwc->remaining) {
if (hwc->remaining < 0)
period = 10000;
else
period = hwc->remaining;
hwc->remaining = 0;
} else {
period = max_t(u64, 10000, hwc->sample_period);
}
__hrtimer_start_range_ns(&hwc->hrtimer,
ns_to_ktime(period), 0,
HRTIMER_MODE_REL, 0);
}
}
static void perf_swevent_cancel_hrtimer(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (hwc->sample_period) {
ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
hwc->remaining = ktime_to_ns(remaining);
hrtimer_cancel(&hwc->hrtimer);
}
}
/*
* Software event: cpu wall time clock
*/
static void cpu_clock_event_update(struct perf_event *event)
{
int cpu = raw_smp_processor_id();
s64 prev;
u64 now;
now = cpu_clock(cpu);
prev = local64_xchg(&event->hw.prev_count, now);
local64_add(now - prev, &event->count);
}
static int cpu_clock_event_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int cpu = raw_smp_processor_id();
local64_set(&hwc->prev_count, cpu_clock(cpu));
perf_swevent_start_hrtimer(event);
return 0;
}
static void cpu_clock_event_disable(struct perf_event *event)
{
perf_swevent_cancel_hrtimer(event);
cpu_clock_event_update(event);
}
static void cpu_clock_event_read(struct perf_event *event)
{
cpu_clock_event_update(event);
}
static int cpu_clock_event_init(struct perf_event *event)
{
if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT;
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
return -ENOENT;
return 0;
}
static struct pmu perf_cpu_clock = {
.event_init = cpu_clock_event_init,
.enable = cpu_clock_event_enable,
.disable = cpu_clock_event_disable,
.read = cpu_clock_event_read,
};
/*
* Software event: task time clock
*/
static void task_clock_event_update(struct perf_event *event, u64 now)
{
u64 prev;
s64 delta;
prev = local64_xchg(&event->hw.prev_count, now);
delta = now - prev;
local64_add(delta, &event->count);
}
static int task_clock_event_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
u64 now;
now = event->ctx->time;
local64_set(&hwc->prev_count, now);
perf_swevent_start_hrtimer(event);
return 0;
}
static void task_clock_event_disable(struct perf_event *event)
{
perf_swevent_cancel_hrtimer(event);
task_clock_event_update(event, event->ctx->time);
}
static void task_clock_event_read(struct perf_event *event)
{
u64 time;
if (!in_nmi()) {
update_context_time(event->ctx);
time = event->ctx->time;
} else {
u64 now = perf_clock();
u64 delta = now - event->ctx->timestamp;
time = event->ctx->time + delta;
}
task_clock_event_update(event, time);
}
static int task_clock_event_init(struct perf_event *event)
{
if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT;
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
return -ENOENT;
return 0;
}
static struct pmu perf_task_clock = {
.event_init = task_clock_event_init,
.enable = task_clock_event_enable,
.disable = task_clock_event_disable,
.read = task_clock_event_read,
};
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
int perf_pmu_register(struct pmu *pmu)
{
mutex_lock(&pmus_lock);
list_add_rcu(&pmu->entry, &pmus);
mutex_unlock(&pmus_lock);
return 0;
}
void perf_pmu_unregister(struct pmu *pmu)
{
mutex_lock(&pmus_lock);
list_del_rcu(&pmu->entry);
mutex_unlock(&pmus_lock);
synchronize_srcu(&pmus_srcu);
}
struct pmu *perf_init_event(struct perf_event *event)
{
struct pmu *pmu = NULL;
u64 event_id = event->attr.config;
int idx;
/*
* Software events (currently) can't in general distinguish
* between user, kernel and hypervisor events.
* However, context switches and cpu migrations are considered
* to be kernel events, and page faults are never hypervisor
* events.
*/
switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
pmu = &perf_ops_cpu_clock;
break;
case PERF_COUNT_SW_TASK_CLOCK:
/*
* If the user instantiates this as a per-cpu event,
* use the cpu_clock event instead.
*/
if (event->ctx->task)
pmu = &perf_ops_task_clock;
else
pmu = &perf_ops_cpu_clock;
break;
case PERF_COUNT_SW_PAGE_FAULTS:
case PERF_COUNT_SW_PAGE_FAULTS_MIN:
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS:
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
int err;
err = swevent_hlist_get(event);
if (err)
return ERR_PTR(err);
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
int ret = pmu->event_init(event);
if (!ret)
break;
if (ret != -ENOENT) {
pmu = ERR_PTR(ret);
break;
}
pmu = &perf_ops_generic;
break;
}
srcu_read_unlock(&pmus_srcu, idx);
return pmu;
}
@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr,
if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
pmu = hw_perf_event_init(event);
break;
pmu = perf_init_event(event);
case PERF_TYPE_SOFTWARE:
pmu = sw_perf_event_init(event);
break;
case PERF_TYPE_TRACEPOINT:
pmu = tp_perf_event_init(event);
break;
case PERF_TYPE_BREAKPOINT:
pmu = bp_perf_event_init(event);
break;
default:
break;
}
done:
err = 0;
if (!pmu)
@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
/*
* This has to have a higher priority than migration_notifier in sched.c.
*/
static struct notifier_block __cpuinitdata perf_cpu_nb = {
.notifier_call = perf_cpu_notify,
.priority = 20,
};
void __init perf_event_init(void)
{
perf_event_init_all_cpus();
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&perf_cpu_nb);
init_srcu_struct(&pmus_srcu);
perf_pmu_register(&perf_swevent);
perf_pmu_register(&perf_cpu_clock);
perf_pmu_register(&perf_task_clock);
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
}
static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,