sched/numa: Find an alternative idle CPU if the CPU is part of an active NUMA balance

Multiple tasks can attempt to select and idle CPU but fail because
numa_migrate_on is already set and the migration fails. Instead of failing,
scan for an alternative idle CPU. select_idle_sibling is not used because
it requires IRQs to be disabled and it ignores numa_migrate_on allowing
multiple tasks to stack. This scan may still fail if there are idle
candidate CPUs due to races but if this occurs, it's best that a task
stay on an available CPU that move to a contended one.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Cc: Hillf Danton <hdanton@sina.com>
Link: https://lore.kernel.org/r/20200224095223.13361-12-mgorman@techsingularity.net
This commit is contained in:
Mel Gorman 2020-02-24 09:52:21 +00:00 committed by Ingo Molnar
parent ff7db0bf24
commit 5fb52dd93a

View file

@ -1624,15 +1624,34 @@ static void task_numa_assign(struct task_numa_env *env,
{
struct rq *rq = cpu_rq(env->dst_cpu);
/* Bail out if run-queue part of active NUMA balance. */
if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1))
return;
/* Check if run-queue part of active NUMA balance. */
if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1)) {
int cpu;
int start = env->dst_cpu;
/* Find alternative idle CPU. */
for_each_cpu_wrap(cpu, cpumask_of_node(env->dst_nid), start) {
if (cpu == env->best_cpu || !idle_cpu(cpu) ||
!cpumask_test_cpu(cpu, env->p->cpus_ptr)) {
continue;
}
env->dst_cpu = cpu;
rq = cpu_rq(env->dst_cpu);
if (!xchg(&rq->numa_migrate_on, 1))
goto assign;
}
/* Failed to find an alternative idle CPU */
return;
}
assign:
/*
* Clear previous best_cpu/rq numa-migrate flag, since task now
* found a better CPU to move/swap.
*/
if (env->best_cpu != -1) {
if (env->best_cpu != -1 && env->best_cpu != env->dst_cpu) {
rq = cpu_rq(env->best_cpu);
WRITE_ONCE(rq->numa_migrate_on, 0);
}
@ -1806,21 +1825,6 @@ assign:
cpu = env->best_cpu;
}
/*
* Use select_idle_sibling if the previously found idle CPU is
* not idle any more.
*/
if (!idle_cpu(cpu)) {
/*
* select_idle_siblings() uses an per-CPU cpumask that
* can be used from IRQ context.
*/
local_irq_disable();
cpu = select_idle_sibling(env->p, env->src_cpu,
env->dst_cpu);
local_irq_enable();
}
env->dst_cpu = cpu;
}