From fc47e7b592dc45b03c27b0a4c8f2d215dae9944d Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@o2.pl>
Date: Wed, 6 Sep 2006 00:03:39 -0700
Subject: [PATCH 01/21] [PATCH] lockdep ifdef fix

With

	CONFIG_SMP=y
	CONFIG_PREEMPT=y
	CONFIG_LOCKDEP=y
	CONFIG_DEBUG_LOCK_ALLOC=y
	# CONFIG_PROVE_LOCKING is not set

spin_unlock_irqrestore() goes through lockdep but spin_lock_irqsave() doesn't.
Apparently, bad things happen.

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/spinlock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index bfd6ad9c0330..fb524b009eef 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(_write_trylock);
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
 #if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
-	defined(CONFIG_PROVE_LOCKING)
+	defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {

From 6dba28379edc08327ede01ff41bd3c9dd46a7fa0 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Wed, 6 Sep 2006 00:03:40 -0700
Subject: [PATCH 02/21] [PATCH] Documentation for lock_key in struct
 hrtimer_base

Fixes an error message on make xmldocs.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e4bccbcc2750..4fc379de6c2f 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -80,6 +80,7 @@ struct hrtimer_sleeper {
  * @get_softirq_time:	function to retrieve the current time from the softirq
  * @curr_timer:		the timer which is executing a callback right now
  * @softirq_time:	the time when running the hrtimer queue in the softirq
+ * @lock_key:		the lock_class_key for use with lockdep
  */
 struct hrtimer_base {
 	clockid_t		index;

From fe2bbc4832659b7ffc867cac03e0a92ae81e11e4 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Wed, 6 Sep 2006 00:03:41 -0700
Subject: [PATCH 03/21] [PATCH] add missing desctiption in super.c

Adds kernel-doc for alloc_super() type in fs/super.c.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/super.c b/fs/super.c
index 6d4e8174b6db..5c4c94d5495e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -49,6 +49,7 @@ DEFINE_SPINLOCK(sb_lock);
 
 /**
  *	alloc_super	-	create new superblock
+ *	@type:	filesystem type superblock should belong to
  *
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.

From 96dd7421a06a5bc6eb731323b95efcb2fd864854 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Sep 2006 00:03:42 -0700
Subject: [PATCH 04/21] [PATCH] prevent timespec/timeval to ktime_t overflow

Frank v.  Waveren pointed out that on 64bit machines the timespec to
ktime_t conversion might overflow.  This is also true for timeval to
ktime_t conversions.  This breaks a "sleep inf" on 64bit machines.

While a timespec/timeval with tx.sec = MAX_LONG is valid by specification
the internal representation of ktime_t is based on nanoseconds.  The
conversion of seconds to nanoseconds overflows for seconds values >=
(MAX_LONG / NSEC_PER_SEC).

Check the seconds argument to the conversion and limit it to the maximum
time which can be represented by ktime_t.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Frank v Waveren <fvw@var.cx>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ktime.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index ed3396dcc4f7..84eeecd60a02 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -56,7 +56,8 @@ typedef union {
 #endif
 } ktime_t;
 
-#define KTIME_MAX			(~((u64)1 << 63))
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
 
 /*
  * ktime_t definitions when using the 64-bit scalar representation:
@@ -73,6 +74,10 @@ typedef union {
  */
 static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
 {
+#if (BITS_PER_LONG == 64)
+	if (unlikely(secs >= KTIME_SEC_MAX))
+		return (ktime_t){ .tv64 = KTIME_MAX };
+#endif
 	return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs };
 }
 

From 471b40d0dfc17bf0161629950b82524d41bc37ce Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 6 Sep 2006 00:03:43 -0700
Subject: [PATCH 05/21] [PATCH] prevent swsusp with PAE

PAE + swsusp results in hard-to-debug crash about 50% of time during
resume.  Cause is known, fix needs to be ported from x86-64 (but we can't
make it to 2.6.18, and I'd like this to be worked around in 2.6.18).

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/Kconfig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ae44a70aae8a..619ecabf7c58 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -56,7 +56,7 @@ config PM_TRACE
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
+	depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need ACPI or APM.
@@ -78,6 +78,10 @@ config SOFTWARE_SUSPEND
 
 	  For more information take a look at <file:Documentation/power/swsusp.txt>.
 
+	  (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386.
+	  we need identity mapping for resume to work, and that is trivial
+	  to get with 4MB pages, but less than trivial on PAE).
+
 config PM_STD_PARTITION
 	string "Default resume partition"
 	depends on SOFTWARE_SUSPEND

From 068c4579fe5c21e84c7cb2ba89db80899e25104e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Sep 2006 00:03:44 -0700
Subject: [PATCH 06/21] [PATCH] lockdep: do not touch console state when
 tainting the kernel

Remove an unintended console_verbose() side-effect from add_taint().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/panic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index 9b8dcfd1ca93..8010b9b17aca 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -173,7 +173,7 @@ const char *print_tainted(void)
 
 void add_taint(unsigned flag)
 {
-	debug_locks_off(); /* can't trust the integrity of the kernel anymore */
+	debug_locks = 0; /* can't trust the integrity of the kernel anymore */
 	tainted |= flag;
 }
 EXPORT_SYMBOL(add_taint);

From bb98ad77d8451a3ccf9478738ffe7ec63394fcdf Mon Sep 17 00:00:00 2001
From: Ismail Donmez <ismail@pardus.org.tr>
Date: Wed, 6 Sep 2006 00:03:44 -0700
Subject: [PATCH 07/21] [PATCH] Move linux/device.h include in linux/atmdev.h
 to #ifdef __KERNEL__ section

linux/device.h header is not included in the David Woodhouse's
kernel-headers git tree which is used for userspace kernel headers.  Which
results in compile errors when building iproute2.  Attached patch moves
linux/device.h include under the #ifdef __KERNEL__ section.

Signed-off-by: Ismail Donmez <ismail@pardus.org.tr>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/atmdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 41788a31c438..2096e5c72827 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -7,7 +7,6 @@
 #define LINUX_ATMDEV_H
 
 
-#include <linux/device.h>
 #include <linux/atmapi.h>
 #include <linux/atm.h>
 #include <linux/atmioc.h>
@@ -210,6 +209,7 @@ struct atm_cirange {
 
 #ifdef __KERNEL__
 
+#include <linux/device.h>
 #include <linux/wait.h> /* wait_queue_head_t */
 #include <linux/time.h> /* struct timeval */
 #include <linux/net.h>

From b4a228346c1a7d09f565e750d2e988c5671e9fa3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 6 Sep 2006 09:03:26 -0700
Subject: [PATCH 08/21] [PATCH] Remove unneeded asm-i386/cpufeature.h from user
 visibility.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/Kbuild | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index c064a8e9170f..335b2fa4e066 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -1,5 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += boot.h cpufeature.h debugreg.h ldt.h setup.h ucontext.h
+header-y += boot.h debugreg.h ldt.h setup.h ucontext.h
 
 unifdef-y += mtrr.h vm86.h

From ebd6c17109aed086908ae3b0949265fd07712659 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 6 Sep 2006 11:02:45 +0100
Subject: [PATCH 09/21] [PATCH] FRV: Use the generic time stuff for FRV

Use the generic time stuff for FRV.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/Kconfig       |  4 +++
 arch/frv/kernel/time.c | 81 ------------------------------------------
 2 files changed, 4 insertions(+), 81 deletions(-)

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 95a3892b8d1b..a601a17cf568 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -29,6 +29,10 @@ config GENERIC_HARDIRQS
 	bool
 	default n
 
+config GENERIC_TIME
+	bool
+	default y
+
 config TIME_LOW_RES
 	bool
 	default y
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index d5b64e193d92..68a77fe3bb40 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -32,8 +32,6 @@
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-extern unsigned long wall_jiffies;
-
 unsigned long __nongprelbss __clkin_clock_speed_HZ;
 unsigned long __nongprelbss __ext_bus_clock_speed_HZ;
 unsigned long __nongprelbss __res_bus_clock_speed_HZ;
@@ -144,85 +142,6 @@ void time_init(void)
 	time_divisor_init();
 }
 
-/*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick;
-
-	do {
-		unsigned long lost;
-
-		seq = read_seqbegin(&xtime_lock);
-
-		usec = 0;
-		lost = jiffies - wall_jiffies;
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0)) {
-			max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
-			usec = min(usec, max_ntp_tick);
-
-			if (lost)
-				usec += lost * max_ntp_tick;
-		}
-		else if (unlikely(lost))
-			usec += lost * (USEC_PER_SEC / HZ);
-
-		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry(&xtime_lock, seq));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= 0 * NSEC_PER_USEC;
-	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
 /*
  * Scheduler clock - returns current time in nanosec units.
  */

From 3a459756810912d2c2bf188cef566af255936b4d Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Thu, 7 Sep 2006 14:17:04 +0400
Subject: [PATCH 10/21] [PATCH] IA64,sparc: local DoS with corrupted ELFs

This prevents cross-region mappings on IA64 and SPARC which could lead
to system crash.  They were correctly trapped for normal mmap() calls,
but not for the kernel internal calls generated by executable loading.

This code just moves the architecture-specific cross-region checks into
an arch-specific "arch_mmap_check()" macro, and defines that for the
architectures that needed it (ia64, sparc and sparc64).

Architectures that don't have any special requirements can just ignore
the new cross-region check, since the mmap() code will just notice on
its own when the macro isn't defined.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
[ Cleaned up to not affect architectures that don't need it ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/kernel/sys_ia64.c     | 28 ++++++++++++++-----------
 arch/sparc/kernel/sys_sparc.c   | 27 ++++++++++++++-----------
 arch/sparc64/kernel/sys_sparc.c | 36 ++++++++++++++++++---------------
 include/asm-ia64/mman.h         |  8 ++++++++
 include/asm-sparc/mman.h        |  8 ++++++++
 include/asm-sparc64/mman.h      |  8 ++++++++
 mm/mmap.c                       | 17 ++++++++++++++--
 7 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 40722d88607a..9ef62a3fbfad 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -163,10 +163,25 @@ sys_pipe (void)
 	return retval;
 }
 
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	unsigned long roff;
+
+	/*
+	 * Don't permit mappings into unmapped space, the virtual page table
+	 * of a region, or across a region boundary.  Note: RGN_MAP_LIMIT is
+	 * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
+	 */
+	roff = REGION_OFFSET(addr);
+	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
+		return -EINVAL;
+	return 0;
+}
+
 static inline unsigned long
 do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
 {
-	unsigned long roff;
 	struct file *file = NULL;
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
@@ -188,17 +203,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
 		goto out;
 	}
 
-	/*
-	 * Don't permit mappings into unmapped space, the virtual page table of a region,
-	 * or across a region boundary.  Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
-	 * (for some integer n <= 61) and len > 0.
-	 */
-	roff = REGION_OFFSET(addr);
-	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
-		addr = -EINVAL;
-		goto out;
-	}
-
 	down_write(&current->mm->mmap_sem);
 	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&current->mm->mmap_sem);
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index a41c8a5c2007..94ff58c9d4a9 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -219,6 +219,21 @@ out:
 	return err;
 }
 
+int sparc_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+{
+	if (ARCH_SUN4C_SUN4 &&
+	    (len > 0x20000000 ||
+	     ((flags & MAP_FIXED) &&
+	      addr < 0xe0000000 && addr + len > 0x20000000)))
+		return -EINVAL;
+
+	/* See asm-sparc/uaccess.h */
+	if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
 /* Linux version of mmap */
 static unsigned long do_mmap2(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
@@ -233,25 +248,13 @@ static unsigned long do_mmap2(unsigned long addr, unsigned long len,
 			goto out;
 	}
 
-	retval = -EINVAL;
 	len = PAGE_ALIGN(len);
-	if (ARCH_SUN4C_SUN4 &&
-	    (len > 0x20000000 ||
-	     ((flags & MAP_FIXED) &&
-	      addr < 0xe0000000 && addr + len > 0x20000000)))
-		goto out_putf;
-
-	/* See asm-sparc/uaccess.h */
-	if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE)
-		goto out_putf;
-
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
 	down_write(&current->mm->mmap_sem);
 	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up_write(&current->mm->mmap_sem);
 
-out_putf:
 	if (file)
 		fput(file);
 out:
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 054d0abdb7ee..bf5f14ee73de 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -548,6 +548,26 @@ asmlinkage long sparc64_personality(unsigned long personality)
 	return ret;
 }
 
+int sparc64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	if (test_thread_flag(TIF_32BIT)) {
+		if (len >= STACK_TOP32)
+			return -EINVAL;
+
+		if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
+			return -EINVAL;
+	} else {
+		if (len >= VA_EXCLUDE_START)
+			return -EINVAL;
+
+		if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* Linux version of mmap */
 asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
@@ -563,27 +583,11 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
 	}
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 	len = PAGE_ALIGN(len);
-	retval = -EINVAL;
-
-	if (test_thread_flag(TIF_32BIT)) {
-		if (len >= STACK_TOP32)
-			goto out_putf;
-
-		if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
-			goto out_putf;
-	} else {
-		if (len >= VA_EXCLUDE_START)
-			goto out_putf;
-
-		if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
-			goto out_putf;
-	}
 
 	down_write(&current->mm->mmap_sem);
 	retval = do_mmap(file, addr, len, prot, flags, off);
 	up_write(&current->mm->mmap_sem);
 
-out_putf:
 	if (file)
 		fput(file);
 out:
diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h
index 6ba179f12718..c73b87832a1e 100644
--- a/include/asm-ia64/mman.h
+++ b/include/asm-ia64/mman.h
@@ -22,4 +22,12 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#define arch_mmap_check	ia64_mmap_check
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
+#endif
+#endif
+
 #endif /* _ASM_IA64_MMAN_H */
diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h
index 88d1886abf3b..b7dc40bc68f4 100644
--- a/include/asm-sparc/mman.h
+++ b/include/asm-sparc/mman.h
@@ -35,4 +35,12 @@
 
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#define arch_mmap_check	sparc_mmap_check
+int sparc_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
+#endif
+#endif
+
 #endif /* __SPARC_MMAN_H__ */
diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h
index 6fd878e61435..8cc1860be630 100644
--- a/include/asm-sparc64/mman.h
+++ b/include/asm-sparc64/mman.h
@@ -35,4 +35,12 @@
 
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#define arch_mmap_check	sparc64_mmap_check
+int sparc64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
+#endif
+#endif
+
 #endif /* __SPARC64_MMAN_H__ */
diff --git a/mm/mmap.c b/mm/mmap.c
index c1868ecdbc5f..e66a0b524aff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -30,6 +30,10 @@
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 
+#ifndef arch_mmap_check
+#define arch_mmap_check(addr, len, flags)	(0)
+#endif
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -913,6 +917,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
 	if (!len)
 		return -EINVAL;
 
+	error = arch_mmap_check(addr, len, flags);
+	if (error)
+		return error;
+
 	/* Careful about overflows.. */
 	len = PAGE_ALIGN(len);
 	if (!len || len > TASK_SIZE)
@@ -1859,6 +1867,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	unsigned long flags;
 	struct rb_node ** rb_link, * rb_parent;
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
+	int error;
 
 	len = PAGE_ALIGN(len);
 	if (!len)
@@ -1867,6 +1876,12 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
 		return -EINVAL;
 
+	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+	error = arch_mmap_check(addr, len, flags);
+	if (error)
+		return error;
+
 	/*
 	 * mlock MCL_FUTURE?
 	 */
@@ -1907,8 +1922,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
 		return -ENOMEM;
 
-	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
 					NULL, NULL, pgoff, NULL))

From c5780e976e19faff345fcef4a01db87108b51a44 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 8 Sep 2006 09:47:15 -0700
Subject: [PATCH 11/21] [PATCH] Use the correct restart option for
 futex_lock_pi

The current implementation of futex_lock_pi returns -ERESTART_RESTARTBLOCK
in case that the lock operation has been interrupted by a signal.  This
results in a return of -EINTR to userspace in case there is an handler for
the signal.  This is wrong, because userspace expects that the lock
function does not return in any case of signal delivery.

This was not caught by my insufficient test case, but triggered a nasty
userspace problem in an high load application scenario.  Unfortunately also
glibc does not check for this invalid return value.

Using -ERSTARTNOINTR makes sure, that the interrupted syscall is restarted.
 The restart block related code can be safely removed, as the possible
timeout argument is an absolute time value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/futex.c | 84 +++++++-------------------------------------------
 1 file changed, 11 insertions(+), 73 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b9b8aea5389e..9d260e838cff 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1120,9 +1120,10 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
-			    struct hrtimer_sleeper *to)
+static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
+			 long nsec, int trylock)
 {
+	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
 	u32 uval, newval, curval;
@@ -1132,6 +1133,13 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
 	if (refill_pi_state_cache())
 		return -ENOMEM;
 
+	if (sec != MAX_SCHEDULE_TIMEOUT) {
+		to = &timeout;
+		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
+		hrtimer_init_sleeper(to, current);
+		to->timer.expires = ktime_set(sec, nsec);
+	}
+
 	q.pi_state = NULL;
  retry:
 	down_read(&curr->mm->mmap_sem);
@@ -1307,7 +1315,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
 	if (!detect && ret == -EDEADLK && 0)
 		force_sig(SIGKILL, current);
 
-	return ret;
+	return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
  out_unlock_release_sem:
 	queue_unlock(&q, hb);
@@ -1341,76 +1349,6 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
 	return ret;
 }
 
-/*
- * Restart handler
- */
-static long futex_lock_pi_restart(struct restart_block *restart)
-{
-	struct hrtimer_sleeper timeout, *to = NULL;
-	int ret;
-
-	restart->fn = do_no_restart_syscall;
-
-	if (restart->arg2 || restart->arg3) {
-		to = &timeout;
-		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
-		hrtimer_init_sleeper(to, current);
-		to->timer.expires.tv64 = ((u64)restart->arg1 << 32) |
-			(u64) restart->arg0;
-	}
-
-	pr_debug("lock_pi restart: %p, %d (%d)\n",
-		 (u32 __user *)restart->arg0, current->pid);
-
-	ret = do_futex_lock_pi((u32 __user *)restart->arg0, restart->arg1,
-			       0, to);
-
-	if (ret != -EINTR)
-		return ret;
-
-	restart->fn = futex_lock_pi_restart;
-
-	/* The other values are filled in */
-	return -ERESTART_RESTARTBLOCK;
-}
-
-/*
- * Called from the syscall entry below.
- */
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
-			 long nsec, int trylock)
-{
-	struct hrtimer_sleeper timeout, *to = NULL;
-	struct restart_block *restart;
-	int ret;
-
-	if (sec != MAX_SCHEDULE_TIMEOUT) {
-		to = &timeout;
-		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
-		hrtimer_init_sleeper(to, current);
-		to->timer.expires = ktime_set(sec, nsec);
-	}
-
-	ret = do_futex_lock_pi(uaddr, detect, trylock, to);
-
-	if (ret != -EINTR)
-		return ret;
-
-	pr_debug("lock_pi interrupted: %p, %d (%d)\n", uaddr, current->pid);
-
-	restart = &current_thread_info()->restart_block;
-	restart->fn = futex_lock_pi_restart;
-	restart->arg0 = (unsigned long) uaddr;
-	restart->arg1 = detect;
-	if (to) {
-		restart->arg2 = to->timer.expires.tv64 & 0xFFFFFFFF;
-		restart->arg3 = to->timer.expires.tv64 >> 32;
-	} else
-		restart->arg2 = restart->arg3 = 0;
-
-	return -ERESTART_RESTARTBLOCK;
-}
-
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
  * This is the in-kernel slowpath: we look up the PI state (if any),

From 4495c9e5cabc82bb4ce930eb5d3dc7544f3f8389 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 8 Sep 2006 09:47:24 -0700
Subject: [PATCH 12/21] [PATCH] optical /proc/ide/*/media

Sergey Vlasov reported that his "FUJITSU MCC3064AP, ATAPI OPTICAL drive"
pops up as UNKNOWN in /proc/ide/*/media .

Closes #4145.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/ide-proc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index c12f1b71e934..41b74b13a00c 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -376,6 +376,8 @@ static int proc_ide_read_media
 				break;
 		case ide_floppy:media = "floppy\n";
 				break;
+		case ide_optical:media = "optical\n";
+				break;
 		default:	media = "UNKNOWN\n";
 				break;
 	}

From 67bb2c692cc02e53c23e4debc92c3a79ddc52a8c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 8 Sep 2006 09:47:34 -0700
Subject: [PATCH 13/21] [PATCH] sh: fix FPN_START typo

Not that it passes allmodconfig without it...

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Mark Haverkamp <markh@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-sh/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index a5559e38744e..5a057b00f19a 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -104,7 +104,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 /* PFN start number, because of __MEMORY_START */
 #define PFN_START		(__MEMORY_START >> PAGE_SHIFT)
-#define ARCH_PFN_OFFSET		(FPN_START)
+#define ARCH_PFN_OFFSET		(PFN_START)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_valid(pfn)		(((pfn) - PFN_START) < max_mapnr)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

From 47d4b9066df023670a61e74565a75293cf15a441 Mon Sep 17 00:00:00 2001
From: David Wang <touch@sis.com>
Date: Fri, 8 Sep 2006 09:47:51 -0700
Subject: [PATCH 14/21] [PATCH] sis5513: add SiS south bridge ID 0x966 and
 0x968

New SiS south bridge device ID is 0x966.

Next coming product will be 0x968. (Will be released in Q4, this year)

We don't make any updates to the IDE controller.

Signed-off-by: David Wang <touch@sis.com>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/pci/sis5513.c | 2 ++
 include/linux/pci_ids.h   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 8a6c23ac8cc1..f03196c5db37 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -86,6 +86,8 @@ static const struct {
 	u8 chipset_family;
 	u8 flags;
 } SiSHostChipInfo[] = {
+	{ "SiS968",	PCI_DEVICE_ID_SI_968,	ATA_133  },
+	{ "SiS966",	PCI_DEVICE_ID_SI_966,	ATA_133  },
 	{ "SiS965",	PCI_DEVICE_ID_SI_965,	ATA_133  },
 	{ "SiS745",	PCI_DEVICE_ID_SI_745,	ATA_100  },
 	{ "SiS735",	PCI_DEVICE_ID_SI_735,	ATA_100  },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c91164ea3dec..7a249155ee4e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -648,6 +648,8 @@
 #define PCI_DEVICE_ID_SI_962		0x0962
 #define PCI_DEVICE_ID_SI_963		0x0963
 #define PCI_DEVICE_ID_SI_965		0x0965
+#define PCI_DEVICE_ID_SI_966		0x0966
+#define PCI_DEVICE_ID_SI_968		0x0968
 #define PCI_DEVICE_ID_SI_5511		0x5511
 #define PCI_DEVICE_ID_SI_5513		0x5513
 #define PCI_DEVICE_ID_SI_5517		0x5517

From 3665d0e58fa44f50c744f85c7e8ad21d5b10e206 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Fri, 8 Sep 2006 09:48:21 -0700
Subject: [PATCH 15/21] [PATCH] ext3_getblk() should handle HOLE correctly

It has been reported that ext3_getblk() is not doing the right thing and
triggering following WARN():

BUG: warning at fs/ext3/inode.c:1016/ext3_getblk()
 <c01c5140> ext3_getblk+0x98/0x2a6  <c03b2806> md_wakeup_thread+0x26/0x2a
 <c01c536d> ext3_bread+0x1f/0x88  <c01cedf9> ext3_quota_read+0x136/0x1ae
 <c018b683> v1_read_dqblk+0x61/0xac  <c0188f32> dquot_acquire+0xf6/0x107
 <c01ceaba> ext3_acquire_dquot+0x46/0x68  <c01897d4> dqget+0x155/0x1e7
 <c018a97b> dquot_transfer+0x3e0/0x3e9  <c016fe52> dput+0x23/0x13e
 <c01c7986> ext3_setattr+0xc3/0x240  <c0120f66> current_fs_time+0x52/0x6a
 <c017320e> notify_change+0x2bd/0x30d  <c0159246> chown_common+0x9c/0xc5
 <c02a222c> strncpy_from_user+0x3b/0x68  <c0167fe6> do_path_lookup+0xdf/0x266
 <c016841b> __user_walk_fd+0x44/0x5a  <c01592b9> sys_chown+0x4a/0x55
 <c015a43c> vfs_write+0xe7/0x13c  <c01695d4> sys_mkdir+0x1f/0x23
 <c0102a97> syscall_call+0x7/0xb

Looking at the code, it looks like it's not handle HOLE correctly.  It ends
up returning -EIO.  Here is the patch to fix it.

If we really want to be paranoid, we can allow return values 0 (HOLE), 1
(we asked for one block) and return -EIO for more than 1 block.  But I
really don't see a reason for doing it - all we need is the block# here.
(doesn't matter how many blocks are mapped).

ext3_get_blocks_handle() returns number of blocks it mapped.  It returns 0
in case of HOLE.  ext3_getblk() should handle HOLE properly (currently its
dumping warning stack and returning -EIO).

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c5ee9f0691e3..0f0b1eadb98d 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 	buffer_trace_init(&dummy.b_history);
 	err = ext3_get_blocks_handle(handle, inode, block, 1,
 					&dummy, create, 1);
-	if (err == 1) {
+	/*
+	 * ext3_get_blocks_handle() returns number of blocks
+	 * mapped. 0 in case of a HOLE.
+	 */
+	if (err > 0) {
+		if (err > 1)
+			WARN_ON(1);
 		err = 0;
-	} else if (err >= 0) {
-		WARN_ON(1);
-		err = -EIO;
 	}
 	*errp = err;
 	if (!err && buffer_mapped(&dummy)) {

From 016eb4a0ed06a3677d67a584da901f0e9a63c666 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 8 Sep 2006 09:48:38 -0700
Subject: [PATCH 16/21] [PATCH] invalidate_complete_page() race fix

If a CPU faults this page into pagetables after invalidate_mapping_pages()
checked page_mapped(), invalidate_complete_page() will still proceed to remove
the page from pagecache.  This leaves the page-faulting process with a
detached page.  If it was MAP_SHARED then file data loss will ensue.

Fix that up by checking the page's refcount after taking tree_lock.

Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/truncate.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index cf1b015df4a7..c6ab55ec6883 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -68,10 +68,10 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 		return 0;
 
 	write_lock_irq(&mapping->tree_lock);
-	if (PageDirty(page)) {
-		write_unlock_irq(&mapping->tree_lock);
-		return 0;
-	}
+	if (PageDirty(page))
+		goto failed;
+	if (page_count(page) != 2)	/* caller's ref + pagecache ref */
+		goto failed;
 
 	BUG_ON(PagePrivate(page));
 	__remove_from_page_cache(page);
@@ -79,6 +79,9 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 	ClearPageUptodate(page);
 	page_cache_release(page);	/* pagecache ref */
 	return 1;
+failed:
+	write_unlock_irq(&mapping->tree_lock);
+	return 0;
 }
 
 /**

From e9f7bee1df223dcf83743b46cb06c08d95497ec0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 8 Sep 2006 09:48:54 -0700
Subject: [PATCH 17/21] [PATCH] NFS: large non-page-aligned direct I/O clobbers
 memory

The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can
allow data->npages to be one larger than rpages.  This causes a page
pointer to be written beyond the end of the pagevec in nfs_read_data (or
nfs_write_data).

Fix this by making nfs_(read|write)_alloc() calculate the size of the
pagevec array, and initialise data->npages.

Also get rid of the redundant argument to nfs_commit_alloc().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/direct.c         | 50 ++++++++++++-----------------------------
 fs/nfs/read.c           | 24 +++++++++++---------
 fs/nfs/write.c          | 37 +++++++++++++-----------------
 include/linux/nfs_fs.h  |  6 ++---
 include/linux/nfs_xdr.h |  4 ++--
 5 files changed, 47 insertions(+), 74 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fecd3b095deb..76ca1cbc38f9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 	return atomic_dec_and_test(&dreq->io_count);
 }
 
-/*
- * "size" is never larger than rsize or wsize.
- */
-static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
-{
-	int page_count;
-
-	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	page_count -= user_addr >> PAGE_SHIFT;
-	BUG_ON(page_count < 0);
-
-	return page_count;
-}
-
-static inline unsigned int nfs_max_pages(unsigned int size)
-{
-	return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-}
-
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
 	size_t rsize = NFS_SERVER(inode)->rsize;
-	unsigned int rpages = nfs_max_pages(rsize);
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
 
 	get_dreq(dreq);
 
-	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
 		size_t bytes;
 
+		pgbase = user_addr & ~PAGE_MASK;
+		bytes = min(rsize,count);
+
 		result = -ENOMEM;
-		data = nfs_readdata_alloc(rpages);
+		data = nfs_readdata_alloc(pgbase + bytes);
 		if (unlikely(!data))
 			break;
 
-		bytes = rsize;
-		if (count < rsize)
-			bytes = count;
-
-		data->npages = nfs_direct_count_pages(user_addr, bytes);
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 1, 0, data->pagevec, NULL);
@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 		started += bytes;
 		user_addr += bytes;
 		pos += bytes;
+		/* FIXME: Remove this unnecessary math from final patch */
 		pgbase += bytes;
 		pgbase &= ~PAGE_MASK;
+		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
 
 		count -= bytes;
 	} while (count != 0);
@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 
 static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
 {
-	dreq->commit_data = nfs_commit_alloc(0);
+	dreq->commit_data = nfs_commit_alloc();
 	if (dreq->commit_data != NULL)
 		dreq->commit_data->req = (struct nfs_page *) dreq;
 }
@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
 	size_t wsize = NFS_SERVER(inode)->wsize;
-	unsigned int wpages = nfs_max_pages(wsize);
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
 
 	get_dreq(dreq);
 
-	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_write_data *data;
 		size_t bytes;
 
+		pgbase = user_addr & ~PAGE_MASK;
+		bytes = min(wsize,count);
+
 		result = -ENOMEM;
-		data = nfs_writedata_alloc(wpages);
+		data = nfs_writedata_alloc(pgbase + bytes);
 		if (unlikely(!data))
 			break;
 
-		bytes = wsize;
-		if (count < wsize)
-			bytes = count;
-
-		data->npages = nfs_direct_count_pages(user_addr, bytes);
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 0, 0, data->pagevec, NULL);
@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 		started += bytes;
 		user_addr += bytes;
 		pos += bytes;
+
+		/* FIXME: Remove this useless math from the final patch */
 		pgbase += bytes;
 		pgbase &= ~PAGE_MASK;
+		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
 
 		count -= bytes;
 	} while (count != 0);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index da9cf11c326f..7a9ee00e0c61 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_data *nfs_readdata_alloc(size_t len)
 {
+	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		p->npages = pagecount;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result;
 	struct nfs_read_data *rdata;
 
-	rdata = nfs_readdata_alloc(1);
+	rdata = nfs_readdata_alloc(count);
 	if (!rdata)
 		return -ENOMEM;
 
@@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 	struct nfs_page *req = nfs_list_entry(head->next);
 	struct page *page = req->wb_page;
 	struct nfs_read_data *data;
-	unsigned int rsize = NFS_SERVER(inode)->rsize;
-	unsigned int nbytes, offset;
+	size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
+	unsigned int offset;
 	int requests = 0;
 	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
 
 	nbytes = req->wb_bytes;
-	for(;;) {
-		data = nfs_readdata_alloc(1);
+	do {
+		size_t len = min(nbytes,rsize);
+
+		data = nfs_readdata_alloc(len);
 		if (!data)
 			goto out_bad;
 		INIT_LIST_HEAD(&data->pages);
 		list_add(&data->pages, &list);
 		requests++;
-		if (nbytes <= rsize)
-			break;
-		nbytes -= rsize;
-	}
+		nbytes -= len;
+	} while(nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
 
 	ClearPageError(page);
@@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
 		return nfs_pagein_multi(head, inode);
 
-	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
+	data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
 	if (!data)
 		goto out_bad;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 50774991f8d5..8ab3cf10d792 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(void)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
-		if (pagecount <= ARRAY_SIZE(p->page_array))
-			p->pagevec = p->page_array;
-		else {
-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-			if (!p->pagevec) {
-				mempool_free(p, nfs_commit_mempool);
-				p = NULL;
-			}
-		}
 	}
 	return p;
 }
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_commit_mempool);
 }
 
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
+	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		p->npages = pagecount;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result, written = 0;
 	struct nfs_write_data *wdata;
 
-	wdata = nfs_writedata_alloc(1);
+	wdata = nfs_writedata_alloc(wsize);
 	if (!wdata)
 		return -ENOMEM;
 
@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 	struct nfs_page *req = nfs_list_entry(head->next);
 	struct page *page = req->wb_page;
 	struct nfs_write_data *data;
-	unsigned int wsize = NFS_SERVER(inode)->wsize;
-	unsigned int nbytes, offset;
+	size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
+	unsigned int offset;
 	int requests = 0;
 	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
 
 	nbytes = req->wb_bytes;
-	for (;;) {
-		data = nfs_writedata_alloc(1);
+	do {
+		size_t len = min(nbytes, wsize);
+
+		data = nfs_writedata_alloc(len);
 		if (!data)
 			goto out_bad;
 		list_add(&data->pages, &list);
 		requests++;
-		if (nbytes <= wsize)
-			break;
-		nbytes -= wsize;
-	}
+		nbytes -= len;
+	} while (nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
 
 	ClearPageError(page);
@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 	struct nfs_write_data	*data;
 	unsigned int		count;
 
-	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
+	data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
 	if (!data)
 		goto out_bad;
 
@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 	struct nfs_write_data	*data;
 	struct nfs_page         *req;
 
-	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
+	data = nfs_commit_alloc();
 
 	if (!data)
 		goto out_bad;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 247434553ae8..530b1e6173b1 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -427,7 +427,7 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 extern void nfs_writedata_release(void *);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+struct nfs_write_data *nfs_commit_alloc(void);
 void nfs_commit_free(struct nfs_write_data *p);
 #endif
 
@@ -478,7 +478,7 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
 /*
  * Allocate nfs_write_data structures
  */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern struct nfs_write_data *nfs_writedata_alloc(size_t len);
 
 /*
  * linux/fs/nfs/read.c
@@ -492,7 +492,7 @@ extern void nfs_readdata_release(void *data);
 /*
  * Allocate nfs_read_data structures
  */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern struct nfs_read_data *nfs_readdata_alloc(size_t len);
 
 /*
  * linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index db9cbf68e12b..41e5a19199e9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -729,7 +729,7 @@ struct nfs_read_data {
 	struct list_head	pages;	/* Coalesced read requests */
 	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
-	unsigned int		npages;	/* active pages in pagevec */
+	unsigned int		npages;	/* Max length of pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 #ifdef CONFIG_NFS_V4
@@ -748,7 +748,7 @@ struct nfs_write_data {
 	struct list_head	pages;		/* Coalesced requests we wish to flush */
 	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
-	unsigned int		npages;		/* active pages in pagevec */
+	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 #ifdef CONFIG_NFS_V4

From b8444d00762703e1b6146fce12ce2684885f8bf6 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Fri, 25 Aug 2006 14:00:19 -0700
Subject: [PATCH 18/21] [IA64] correct file descriptor reference counting in
 perfmon

Fix a bug in sys_perfmonctl() whereby it was not correctly
decrementing the file descriptor reference count.

Signed-off-by: stephane eranian <eranian@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/perfmon.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index c7ccd6ee1ddf..84a7e52f56f6 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4936,13 +4936,15 @@ abort_locked:
 	if (likely(ctx)) {
 		DPRINT(("context unlocked\n"));
 		UNPROTECT_CTX(ctx, flags);
-		fput(file);
 	}
 
 	/* copy argument back to user, if needed */
 	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
 
 error_args:
+	if (file)
+		fput(file);
+
 	kfree(args_k);
 
 	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));

From 2636255488484e04d6d54303d2b0ec30f7ef7e02 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Mon, 4 Sep 2006 21:56:09 +0200
Subject: [PATCH 19/21] [IA64] Unwire set/get_robust_list

The syscalls set/get_robust_list must not be wired up until
futex_atomic_cmpxchg_inatomic is implemented.  Otherwise the kernel will
hang in handle_futex_death.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/entry.S  | 4 ++--
 include/asm-ia64/unistd.h | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 12701cf32d99..fef06571be99 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1605,8 +1605,8 @@ sys_call_table:
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
 	data8 sys_splice
-	data8 sys_set_robust_list
-	data8 sys_get_robust_list
+	data8 sys_ni_syscall			// reserved for set_robust_list
+	data8 sys_ni_syscall			// reserved for get_robust_list
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_vmsplice
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index bb0eb727dcd0..f581662c5ab8 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -286,8 +286,7 @@
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
 #define __NR_splice			1297
-#define __NR_set_robust_list		1298
-#define __NR_get_robust_list		1299
+/* 1298, 1299 reserved for set_robust_list/get_robust_list */
 #define __NR_sync_file_range		1300
 #define __NR_tee			1301
 #define __NR_vmsplice			1302

From 1c7d67073e2d196597f541351bc9b109c8a93528 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Thu, 31 Aug 2006 11:34:47 -0500
Subject: [PATCH 20/21] [IA64] Save register stack contents on cpu start

The SN PROM uses the register stack in the slave loop. The contents
must be preserved for the OS to return to the slave loop via offlining
a cpu or for kexec. A 'flushrs" is needed to force the stack to be written
to memory prior to changing bspstore.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/head.S | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 29236f0c62b5..44d540efa6d1 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -197,6 +197,11 @@ start_ap:
 	;;
 	srlz.i
 	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
 	/*
 	 * Save the region registers, predicate before they get clobbered
 	 */

From 38f5745c5a90641079fd5b48600ae63f7ab6edcd Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Thu, 31 Aug 2006 11:35:57 -0500
Subject: [PATCH 21/21] [IA64] SN fix for cpu hotplug/kexec

The sn_cpu_init() is required for cpu initialization on SN platforms.
Change __init to __cpuinit so that the function is not freed with init code/data.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index c119e8b620de..5f2dcba7fa8d 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -565,7 +565,7 @@ static void __init sn_init_pdas(char **cmdline_p)
  * Also sets up a few fields in the nodepda.  Also known as
  * platform_cpu_init() by the ia64 machvec code.
  */
-void __init sn_cpu_init(void)
+void __cpuinit sn_cpu_init(void)
 {
 	int cpuid;
 	int cpuphyid;