From 2fe401e38602e853e01376cdb670b0bc4d526a6d Mon Sep 17 00:00:00 2001
From: Adrian Knoth <adi@drcomp.erfurt.thur.de>
Date: Wed, 12 Nov 2008 16:23:55 -0800
Subject: [PATCH 1/4] sched: correct sched-rt-group.txt pathname in
 init/Kconfig

init/Kconfig directs the user to Documentation/sched-rt-group.txt, but
the file is actually in Documentation/scheduler/sched-rt-group.txt.

This patch corrects the pathname mentioned in init/Kconfig.

Signed-off-by: Adrian Knoth <adi@drcomp.erfurt.thur.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 init/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index 86b00c53fade..2f850d800d94 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -354,7 +354,7 @@ config RT_GROUP_SCHED
 	  setting below. If enabled, it will also make it impossible to
 	  schedule realtime tasks for non-root users until you allocate
 	  realtime bandwidth for them.
-	  See Documentation/sched-rt-group.txt for more information.
+	  See Documentation/scheduler/sched-rt-group.txt for more information.
 
 choice
 	depends on GROUP_SCHED

From 29d7b90c15035741d15421b36000509212b3e135 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 16 Nov 2008 08:07:15 +0100
Subject: [PATCH 2/4] sched: fix kernel warning on /proc/sched_debug access

Luis Henriques reported that with CONFIG_PREEMPT=y + CONFIG_PREEMPT_DEBUG=y +
CONFIG_SCHED_DEBUG=y + CONFIG_LATENCYTOP=y enabled, the following warning
triggers when using latencytop:

> [  775.663239] BUG: using smp_processor_id() in preemptible [00000000] code: latencytop/6585
> [  775.663303] caller is native_sched_clock+0x3a/0x80
> [  775.663314] Pid: 6585, comm: latencytop Tainted: G        W 2.6.28-rc4-00355-g9c7c354 #1
> [  775.663322] Call Trace:
> [  775.663343]  [<ffffffff803a94e4>] debug_smp_processor_id+0xe4/0xf0
> [  775.663356]  [<ffffffff80213f7a>] native_sched_clock+0x3a/0x80
> [  775.663368]  [<ffffffff80213e19>] sched_clock+0x9/0x10
> [  775.663381]  [<ffffffff8024550d>] proc_sched_show_task+0x8bd/0x10e0
> [  775.663395]  [<ffffffff8034466e>] sched_show+0x3e/0x80
> [  775.663408]  [<ffffffff8031039b>] seq_read+0xdb/0x350
> [  775.663421]  [<ffffffff80368776>] ? security_file_permission+0x16/0x20
> [  775.663435]  [<ffffffff802f4198>] vfs_read+0xc8/0x170
> [  775.663447]  [<ffffffff802f4335>] sys_read+0x55/0x90
> [  775.663460]  [<ffffffff8020c67a>] system_call_fastpath+0x16/0x1b
> ...

This breakage was caused by me via:

  7cbaef9: sched: optimize sched_clock() a bit

Change the calls to cpu_clock().

Reported-by: Luis Henriques <henrix@sapo.pt>
---
 kernel/sched_debug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48ecc51e7701..26ed8e3d1c15 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -423,10 +423,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #undef __P
 
 	{
+		unsigned int this_cpu = raw_smp_processor_id();
 		u64 t0, t1;
 
-		t0 = sched_clock();
-		t1 = sched_clock();
+		t0 = cpu_clock(this_cpu);
+		t1 = cpu_clock(this_cpu);
 		SEQ_printf(m, "%-35s:%21Ld\n",
 			   "clock-delta", (long long)(t1-t0));
 	}

From ad133ba3dc283300e5b62b5b7211d2f39fbf6ee7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Nov 2008 15:39:47 +0100
Subject: [PATCH 3/4] sched, signals: fix the racy usage of ->signal in
 account_group_xxx/run_posix_cpu_timers

Impact: fix potential NULL dereference

Contrary to ad474caca3e2a0550b7ce0706527ad5ab389a4d4 changelog, other
acct_group_xxx() helpers can be called after exit_notify() by timer tick.
Thanks to Roland for pointing out this. Somehow I missed this simple fact
when I read the original patch, and I am afraid I confused Frank during
the discussion. Sorry.

Fortunately, these helpers work with current, we can check ->exit_state
to ensure that ->signal can't go away under us.

Also, add the comment and compiler barrier to account_group_exec_runtime(),
to make sure we load ->signal only once.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/posix-cpu-timers.c |  7 +++++--
 kernel/sched_stats.h      | 15 +++++++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2639c3..895337b16a24 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
  */
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
-	struct signal_struct *sig = tsk->signal;
+	struct signal_struct *sig;
 
-	if (unlikely(!sig))
+	/* tsk == current, ensure it is safe to use ->signal/sighand */
+	if (unlikely(tsk->exit_state))
 		return 0;
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
 			return 1;
 	}
+
+	sig = tsk->signal;
 	if (!task_cputime_zero(&sig->cputime_expires)) {
 		struct task_cputime group_sample;
 
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index ee71bec1da66..7dbf72a2b02c 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
 {
 	struct signal_struct *sig;
 
-	sig = tsk->signal;
-	if (unlikely(!sig))
+	/* tsk == current, ensure it is safe to use ->signal */
+	if (unlikely(tsk->exit_state))
 		return;
+
+	sig = tsk->signal;
 	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
 {
 	struct signal_struct *sig;
 
-	sig = tsk->signal;
-	if (unlikely(!sig))
+	/* tsk == current, ensure it is safe to use ->signal */
+	if (unlikely(tsk->exit_state))
 		return;
+
+	sig = tsk->signal;
 	if (sig->cputime.totals) {
 		struct task_cputime *times;
 
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
 	struct signal_struct *sig;
 
 	sig = tsk->signal;
+	/* see __exit_signal()->task_rq_unlock_wait() */
+	barrier();
 	if (unlikely(!sig))
 		return;
+
 	if (sig->cputime.totals) {
 		struct task_cputime *times;
 

From 700018e0a77b4113172257fcdaa1c58e27a5074f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 18 Nov 2008 14:02:03 +0800
Subject: [PATCH 4/4] cpuset: fix regression when failed to generate sched
 domains

Impact: properly rebuild sched-domains on kmalloc() failure

When cpuset failed to generate sched domains due to kmalloc()
failure, the scheduler should fallback to the single partition
'fallback_doms' and rebuild sched domains, but now it only
destroys but not rebuilds sched domains.

The regression was introduced by:

| commit dfb512ec4834116124da61d6c1ee10fd0aa32bd6
| Author: Max Krasnyansky <maxk@qualcomm.com>
| Date:   Fri Aug 29 13:11:41 2008 -0700
|
|    sched: arch_reinit_sched_domains() must destroy domains to force rebuild

After the above commit, partition_sched_domains(0, NULL, NULL) will
only destroy sched domains and partition_sched_domains(1, NULL, NULL)
will create the default sched domain.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/cpuset.c | 12 ++++++++----
 kernel/sched.c  | 13 +++++++------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..81fc6791a296 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -587,7 +587,6 @@ static int generate_sched_domains(cpumask_t **domains,
 	int ndoms;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] cpumask_t slot */
 
-	ndoms = 0;
 	doms = NULL;
 	dattr = NULL;
 	csa = NULL;
@@ -674,10 +673,8 @@ restart:
 	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
 	 */
 	doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
-	if (!doms) {
-		ndoms = 0;
+	if (!doms)
 		goto done;
-	}
 
 	/*
 	 * The rest of the code, including the scheduler, can deal with
@@ -732,6 +729,13 @@ restart:
 done:
 	kfree(csa);
 
+	/*
+	 * Fallback to the default domain if kmalloc() failed.
+	 * See comments in partition_sched_domains().
+	 */
+	if (doms == NULL)
+		ndoms = 1;
+
 	*domains    = doms;
 	*attributes = dattr;
 	return ndoms;
diff --git a/kernel/sched.c b/kernel/sched.c
index c94baf2969e7..9b1e79371c20 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7789,13 +7789,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  *
  * The passed in 'doms_new' should be kmalloc'd. This routine takes
  * ownership of it and will kfree it when done with it. If the caller
- * failed the kmalloc call, then it can pass in doms_new == NULL,
- * and partition_sched_domains() will fallback to the single partition
- * 'fallback_doms', it also forces the domains to be rebuilt.
+ * failed the kmalloc call, then it can pass in doms_new == NULL &&
+ * ndoms_new == 1, and partition_sched_domains() will fallback to
+ * the single partition 'fallback_doms', it also forces the domains
+ * to be rebuilt.
  *
- * If doms_new==NULL it will be replaced with cpu_online_map.
- * ndoms_new==0 is a special case for destroying existing domains.
- * It will not create the default domain.
+ * If doms_new == NULL it will be replaced with cpu_online_map.
+ * ndoms_new == 0 is a special case for destroying existing domains,
+ * and it will not create the default domain.
  *
  * Call with hotplug lock held
  */