perf/cgroup: Order events in RB tree by cgroup id

If one is monitoring 6 events on 20 cgroups the per-CPU RB tree will
hold 120 events. The scheduling in of the events currently iterates
over all events looking to see which events match the task's cgroup or
its cgroup hierarchy. If a task is in 1 cgroup with 6 events, then 114
events are considered unnecessarily.

This change orders events in the RB tree by cgroup id if it is present.
This means scheduling in may go directly to events associated with the
task's cgroup if one is present. The per-CPU iterator storage in
visit_groups_merge is sized sufficent for an iterator per cgroup depth,
where different iterators are needed for the task's cgroup and parent
cgroups. By considering the set of iterators when visiting, the lowest
group_index event may be selected and the insertion order group_index
property is maintained. This also allows event rotation to function
correctly, as although events are grouped into a cgroup, rotation always
selects the lowest group_index event to rotate (delete/insert into the
tree) and the min heap of iterators make it so that the group_index order
is maintained.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20190724223746.153620-3-irogers@google.com
This commit is contained in:
Ian Rogers 2020-02-13 23:51:33 -08:00 committed by Ingo Molnar
parent c2283c9368
commit 95ed6c707f

View file

@ -1577,6 +1577,30 @@ perf_event_groups_less(struct perf_event *left, struct perf_event *right)
if (left->cpu > right->cpu)
return false;
#ifdef CONFIG_CGROUP_PERF
if (left->cgrp != right->cgrp) {
if (!left->cgrp || !left->cgrp->css.cgroup) {
/*
* Left has no cgroup but right does, no cgroups come
* first.
*/
return true;
}
if (!right->cgrp || right->cgrp->css.cgroup) {
/*
* Right has no cgroup but left does, no cgroups come
* first.
*/
return false;
}
/* Two dissimilar cgroups, order by id. */
if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id)
return true;
return false;
}
#endif
if (left->group_index < right->group_index)
return true;
if (left->group_index > right->group_index)
@ -1656,25 +1680,48 @@ del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
}
/*
* Get the leftmost event in the @cpu subtree.
* Get the leftmost event in the cpu/cgroup subtree.
*/
static struct perf_event *
perf_event_groups_first(struct perf_event_groups *groups, int cpu)
perf_event_groups_first(struct perf_event_groups *groups, int cpu,
struct cgroup *cgrp)
{
struct perf_event *node_event = NULL, *match = NULL;
struct rb_node *node = groups->tree.rb_node;
#ifdef CONFIG_CGROUP_PERF
u64 node_cgrp_id, cgrp_id = 0;
if (cgrp)
cgrp_id = cgrp->kn->id;
#endif
while (node) {
node_event = container_of(node, struct perf_event, group_node);
if (cpu < node_event->cpu) {
node = node->rb_left;
} else if (cpu > node_event->cpu) {
node = node->rb_right;
} else {
match = node_event;
node = node->rb_left;
continue;
}
if (cpu > node_event->cpu) {
node = node->rb_right;
continue;
}
#ifdef CONFIG_CGROUP_PERF
node_cgrp_id = 0;
if (node_event->cgrp && node_event->cgrp->css.cgroup)
node_cgrp_id = node_event->cgrp->css.cgroup->kn->id;
if (cgrp_id < node_cgrp_id) {
node = node->rb_left;
continue;
}
if (cgrp_id > node_cgrp_id) {
node = node->rb_right;
continue;
}
#endif
match = node_event;
node = node->rb_left;
}
return match;
@ -1687,12 +1734,26 @@ static struct perf_event *
perf_event_groups_next(struct perf_event *event)
{
struct perf_event *next;
#ifdef CONFIG_CGROUP_PERF
u64 curr_cgrp_id = 0;
u64 next_cgrp_id = 0;
#endif
next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
if (next && next->cpu == event->cpu)
return next;
if (next == NULL || next->cpu != event->cpu)
return NULL;
return NULL;
#ifdef CONFIG_CGROUP_PERF
if (event->cgrp && event->cgrp->css.cgroup)
curr_cgrp_id = event->cgrp->css.cgroup->kn->id;
if (next->cgrp && next->cgrp->css.cgroup)
next_cgrp_id = next->cgrp->css.cgroup->kn->id;
if (curr_cgrp_id != next_cgrp_id)
return NULL;
#endif
return next;
}
/*
@ -3473,6 +3534,9 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
int (*func)(struct perf_event *, void *),
void *data)
{
#ifdef CONFIG_CGROUP_PERF
struct cgroup_subsys_state *css = NULL;
#endif
/* Space for per CPU and/or any CPU event iterators. */
struct perf_event *itrs[2];
struct min_heap event_heap;
@ -3487,6 +3551,11 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
};
lockdep_assert_held(&cpuctx->ctx.lock);
#ifdef CONFIG_CGROUP_PERF
if (cpuctx->cgrp)
css = &cpuctx->cgrp->css;
#endif
} else {
event_heap = (struct min_heap){
.data = itrs,
@ -3494,11 +3563,16 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
.size = ARRAY_SIZE(itrs),
};
/* Events not within a CPU context may be on any CPU. */
__heap_add(&event_heap, perf_event_groups_first(groups, -1));
__heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
}
evt = event_heap.data;
__heap_add(&event_heap, perf_event_groups_first(groups, cpu));
__heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
#ifdef CONFIG_CGROUP_PERF
for (; css; css = css->parent)
__heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
#endif
min_heapify_all(&event_heap, &perf_min_heap);