perf events: Clean up pid passing

The kernel perf event creation path shouldn't use find_task_by_vpid()
because a vpid exists in a specific namespace. find_task_by_vpid() uses
current's pid namespace which isn't always the correct namespace to use
for the vpid in all the places perf_event_create_kernel_counter() (and
thus find_get_context()) is called.

The goal is to clean up pid namespace handling and prevent bugs like:

	https://bugzilla.kernel.org/show_bug.cgi?id=17281

Instead of using pids switch find_get_context() to use task struct
pointers directly. The syscall is responsible for resolving the pid to
a task struct. This moves the pid namespace resolution into the syscall
much like every other syscall that takes pid parameters.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robin Green <greenrd@greenrd.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
LKML-Reference: <a134e5e392ab0204961fd1a62c84a222bf5874a9.1284407763.git.matthltc@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Matt Helsley 2010-09-13 13:01:20 -07:00 committed by Ingo Molnar
parent 2ebd4ffb6d
commit 38a81da220
5 changed files with 15 additions and 17 deletions

View file

@ -96,7 +96,7 @@ static int op_create_counter(int cpu, int event)
return ret; return ret;
pevent = perf_event_create_kernel_counter(&counter_config[event].attr, pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
cpu, -1, cpu, NULL,
op_overflow_handler); op_overflow_handler);
if (IS_ERR(pevent)) { if (IS_ERR(pevent)) {

View file

@ -902,7 +902,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event * extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu, int cpu,
pid_t pid, struct task_struct *task,
perf_overflow_handler_t callback); perf_overflow_handler_t callback);
extern u64 perf_event_read_value(struct perf_event *event, extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running); u64 *enabled, u64 *running);

View file

@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_overflow_handler_t triggered, perf_overflow_handler_t triggered,
struct task_struct *tsk) struct task_struct *tsk)
{ {
return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
triggered);
} }
EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
get_online_cpus(); get_online_cpus();
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
pevent = per_cpu_ptr(cpu_events, cpu); pevent = per_cpu_ptr(cpu_events, cpu);
bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
*pevent = bp; *pevent = bp;

View file

@ -2053,15 +2053,14 @@ errout:
} }
static struct perf_event_context * static struct perf_event_context *
find_get_context(struct pmu *pmu, pid_t pid, int cpu) find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
{ {
struct perf_event_context *ctx; struct perf_event_context *ctx;
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
struct task_struct *task;
unsigned long flags; unsigned long flags;
int ctxn, err; int ctxn, err;
if (pid == -1 && cpu != -1) { if (!task && cpu != -1) {
/* Must be root to operate on a CPU event: */ /* Must be root to operate on a CPU event: */
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES); return ERR_PTR(-EACCES);
@ -2084,10 +2083,6 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu)
return ctx; return ctx;
} }
task = find_lively_task_by_vpid(pid);
if (IS_ERR(task))
return (void*)task;
err = -EINVAL; err = -EINVAL;
ctxn = pmu->task_ctx_nr; ctxn = pmu->task_ctx_nr;
if (ctxn < 0) if (ctxn < 0)
@ -5527,6 +5522,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_context *ctx; struct perf_event_context *ctx;
struct file *event_file = NULL; struct file *event_file = NULL;
struct file *group_file = NULL; struct file *group_file = NULL;
struct task_struct *task = NULL;
struct pmu *pmu; struct pmu *pmu;
int event_fd; int event_fd;
int fput_needed = 0; int fput_needed = 0;
@ -5581,10 +5577,13 @@ SYSCALL_DEFINE5(perf_event_open,
if ((pmu->task_ctx_nr == perf_sw_context) && group_leader) if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
pmu = group_leader->pmu; pmu = group_leader->pmu;
if (pid != -1)
task = find_lively_task_by_vpid(pid);
/* /*
* Get the target context (task or percpu): * Get the target context (task or percpu):
*/ */
ctx = find_get_context(pmu, pid, cpu); ctx = find_get_context(pmu, task, cpu);
if (IS_ERR(ctx)) { if (IS_ERR(ctx)) {
err = PTR_ERR(ctx); err = PTR_ERR(ctx);
goto err_group_fd; goto err_group_fd;
@ -5666,11 +5665,11 @@ err_fd:
* *
* @attr: attributes of the counter to create * @attr: attributes of the counter to create
* @cpu: cpu in which the counter is bound * @cpu: cpu in which the counter is bound
* @pid: task to profile * @task: task to profile (NULL for percpu)
*/ */
struct perf_event * struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
pid_t pid, struct task_struct *task,
perf_overflow_handler_t overflow_handler) perf_overflow_handler_t overflow_handler)
{ {
struct perf_event_context *ctx; struct perf_event_context *ctx;
@ -5687,7 +5686,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err; goto err;
} }
ctx = find_get_context(event->pmu, pid, cpu); ctx = find_get_context(event->pmu, task, cpu);
if (IS_ERR(ctx)) { if (IS_ERR(ctx)) {
err = PTR_ERR(ctx); err = PTR_ERR(ctx);
goto err_free; goto err_free;

View file

@ -358,7 +358,7 @@ static int watchdog_nmi_enable(int cpu)
/* Try to register using hardware perf events */ /* Try to register using hardware perf events */
wd_attr = &wd_hw_attr; wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(); wd_attr->sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
if (!IS_ERR(event)) { if (!IS_ERR(event)) {
printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
goto out_save; goto out_save;