alistair23-linux/kernel/rtmutex-debug.c
Lai Jiangshan 8161239a8b rtmutex: Simplify PI algorithm and make highest prio task get lock
In current rtmutex, the pending owner may be boosted by the tasks
in the rtmutex's waitlist when the pending owner is deboosted
or a task in the waitlist is boosted. This boosting is unrelated,
because the pending owner does not really take the rtmutex.
It is not reasonable.

Example.

time1:
A(high prio) onwers the rtmutex.
B(mid prio) and C (low prio) in the waitlist.

time2
A release the lock, B becomes the pending owner
A(or other high prio task) continues to run. B's prio is lower
than A, so B is just queued at the runqueue.

time3
A or other high prio task sleeps, but we have passed some time
The B and C's prio are changed in the period (time2 ~ time3)
due to boosting or deboosting. Now C has the priority higher
than B. ***Is it reasonable that C has to boost B and help B to
get the rtmutex?

NO!! I think, it is unrelated/unneed boosting before B really
owns the rtmutex. We should give C a chance to beat B and
win the rtmutex.

This is the motivation of this patch. This patch *ensures*
only the top waiter or higher priority task can take the lock.

How?
1) we don't dequeue the top waiter when unlock, if the top waiter
   is changed, the old top waiter will fail and go to sleep again.
2) when requiring lock, it will get the lock when the lock is not taken and:
   there is no waiter OR higher priority than waiters OR it is top waiter.
3) In any time, the top waiter is changed, the top waiter will be woken up.

The algorithm is much simpler than before, no pending owner, no
boosting for pending owner.

Other advantage of this patch:
1) The states of a rtmutex are reduced a half, easier to read the code.
2) the codes become shorter.
3) top waiter is not dequeued until it really take the lock:
   they will retain FIFO when it is stolen.

Not advantage nor disadvantage
1) Even we may wakeup multiple waiters(any time when top waiter changed),
   we hardly cause "thundering herd",
   the number of wokenup task is likely 1 or very little.
2) two APIs are changed.
   rt_mutex_owner() will not return pending owner, it will return NULL when
                    the top waiter is going to take the lock.
   rt_mutex_next_owner() always return the top waiter.
	                 will not return NULL if we have waiters
                         because the top waiter is not dequeued.

   I have fixed the code that use these APIs.

need updated after this patch is accepted
1) Document/*
2) the testcase scripts/rt-tester/t4-l2-pi-deboost.tst

Signed-off-by:  Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4D3012D5.4060709@cn.fujitsu.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-01-27 21:13:51 -05:00

239 lines
5.7 KiB
C

/*
* RT-Mutexes: blocking mutual exclusion locks with PI support
*
* started by Ingo Molnar and Thomas Gleixner:
*
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* This code is based on the rt.c implementation in the preempt-rt tree.
* Portions of said code are
*
* Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey
* Copyright (C) 2006 Esben Nielsen
* Copyright (C) 2006 Kihon Technologies Inc.,
* Steven Rostedt <rostedt@goodmis.org>
*
* See rt.c in preempt-rt for proper credits and further information
*/
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/plist.h>
#include <linux/fs.h>
#include <linux/debug_locks.h>
#include "rtmutex_common.h"
# define TRACE_WARN_ON(x) WARN_ON(x)
# define TRACE_BUG_ON(x) BUG_ON(x)
# define TRACE_OFF() \
do { \
if (rt_trace_on) { \
rt_trace_on = 0; \
console_verbose(); \
if (raw_spin_is_locked(&current->pi_lock)) \
raw_spin_unlock(&current->pi_lock); \
} \
} while (0)
# define TRACE_OFF_NOLOCK() \
do { \
if (rt_trace_on) { \
rt_trace_on = 0; \
console_verbose(); \
} \
} while (0)
# define TRACE_BUG_LOCKED() \
do { \
TRACE_OFF(); \
BUG(); \
} while (0)
# define TRACE_WARN_ON_LOCKED(c) \
do { \
if (unlikely(c)) { \
TRACE_OFF(); \
WARN_ON(1); \
} \
} while (0)
# define TRACE_BUG_ON_LOCKED(c) \
do { \
if (unlikely(c)) \
TRACE_BUG_LOCKED(); \
} while (0)
#ifdef CONFIG_SMP
# define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c)
#else
# define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0)
#endif
/*
* deadlock detection flag. We turn it off when we detect
* the first problem because we dont want to recurse back
* into the tracing code when doing error printk or
* executing a BUG():
*/
static int rt_trace_on = 1;
static void printk_task(struct task_struct *p)
{
if (p)
printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
else
printk("<none>");
}
static void printk_lock(struct rt_mutex *lock, int print_owner)
{
if (lock->name)
printk(" [%p] {%s}\n",
lock, lock->name);
else
printk(" [%p] {%s:%d}\n",
lock, lock->file, lock->line);
if (print_owner && rt_mutex_owner(lock)) {
printk(".. ->owner: %p\n", lock->owner);
printk(".. held by: ");
printk_task(rt_mutex_owner(lock));
printk("\n");
}
}
void rt_mutex_debug_task_free(struct task_struct *task)
{
WARN_ON(!plist_head_empty(&task->pi_waiters));
WARN_ON(task->pi_blocked_on);
}
/*
* We fill out the fields in the waiter to store the information about
* the deadlock. We print when we return. act_waiter can be NULL in
* case of a remove waiter operation.
*/
void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter,
struct rt_mutex *lock)
{
struct task_struct *task;
if (!rt_trace_on || detect || !act_waiter)
return;
task = rt_mutex_owner(act_waiter->lock);
if (task && task != current) {
act_waiter->deadlock_task_pid = get_pid(task_pid(task));
act_waiter->deadlock_lock = lock;
}
}
void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
{
struct task_struct *task;
if (!waiter->deadlock_lock || !rt_trace_on)
return;
rcu_read_lock();
task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
if (!task) {
rcu_read_unlock();
return;
}
TRACE_OFF_NOLOCK();
printk("\n============================================\n");
printk( "[ BUG: circular locking deadlock detected! ]\n");
printk( "--------------------------------------------\n");
printk("%s/%d is deadlocking current task %s/%d\n\n",
task->comm, task_pid_nr(task),
current->comm, task_pid_nr(current));
printk("\n1) %s/%d is trying to acquire this lock:\n",
current->comm, task_pid_nr(current));
printk_lock(waiter->lock, 1);
printk("\n2) %s/%d is blocked on this lock:\n",
task->comm, task_pid_nr(task));
printk_lock(waiter->deadlock_lock, 1);
debug_show_held_locks(current);
debug_show_held_locks(task);
printk("\n%s/%d's [blocked] stackdump:\n\n",
task->comm, task_pid_nr(task));
show_stack(task, NULL);
printk("\n%s/%d's [current] stackdump:\n\n",
current->comm, task_pid_nr(current));
dump_stack();
debug_show_all_locks();
rcu_read_unlock();
printk("[ turning off deadlock detection."
"Please report this trace. ]\n\n");
local_irq_disable();
}
void debug_rt_mutex_lock(struct rt_mutex *lock)
{
}
void debug_rt_mutex_unlock(struct rt_mutex *lock)
{
TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
}
void
debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
{
}
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
}
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
memset(waiter, 0x11, sizeof(*waiter));
plist_node_init(&waiter->list_entry, MAX_PRIO);
plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
waiter->deadlock_task_pid = NULL;
}
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
memset(waiter, 0x22, sizeof(*waiter));
}
void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
{
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lock->name = name;
}
void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
{
}
void rt_mutex_deadlock_account_unlock(struct task_struct *task)
{
}